qemu/qemu-img.c

3888 lines
112 KiB
C
Raw Normal View History

/*
* QEMU disk image utility
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "qemu-version.h"
2016-03-14 08:01:28 +00:00
#include "qapi/error.h"
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"
#include "qemu/cutils.h"
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qom/object_interfaces.h"
#include "sysemu/sysemu.h"
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
#include "sysemu/block-backend.h"
#include "block/block_int.h"
#include "block/blockjob.h"
#include "block/qapi.h"
#include "crypto/init.h"
#include "trace/control.h"
#include <getopt.h>
#define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
", " QEMU_COPYRIGHT "\n"
typedef struct img_cmd_t {
const char *name;
int (*handler)(int argc, char **argv);
} img_cmd_t;
enum {
OPTION_OUTPUT = 256,
OPTION_BACKING_CHAIN = 257,
OPTION_OBJECT = 258,
OPTION_IMAGE_OPTS = 259,
OPTION_PATTERN = 260,
OPTION_FLUSH_INTERVAL = 261,
OPTION_NO_DRAIN = 262,
};
typedef enum OutputFormat {
OFORMAT_JSON,
OFORMAT_HUMAN,
} OutputFormat;
/* Default to cache=writeback as data integrity is not important for qemu-img */
#define BDRV_DEFAULT_CACHE "writeback"
static void format_print(void *opaque, const char *name)
{
printf(" %s", name);
}
static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
{
va_list ap;
error_printf("qemu-img: ");
va_start(ap, fmt);
error_vprintf(fmt, ap);
va_end(ap);
error_printf("\nTry 'qemu-img --help' for more information\n");
exit(EXIT_FAILURE);
}
/* Please keep in synch with qemu-img.texi */
static void QEMU_NORETURN help(void)
{
const char *help_msg =
QEMU_IMG_VERSION
"usage: qemu-img [standard options] command [command options]\n"
"QEMU disk image utility\n"
"\n"
" '-h', '--help' display this help and exit\n"
" '-V', '--version' output version information and exit\n"
" '-T', '--trace' [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
" specify tracing options\n"
"\n"
"Command syntax:\n"
#define DEF(option, callback, arg_string) \
" " arg_string "\n"
#include "qemu-img-cmds.h"
#undef DEF
#undef GEN_DOCS
"\n"
"Command parameters:\n"
" 'filename' is a disk image filename\n"
" 'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
" manual page for a description of the object properties. The most common\n"
" object type is a 'secret', which is used to supply passwords and/or\n"
" encryption keys.\n"
" 'fmt' is the disk image format. It is guessed automatically in most cases\n"
" 'cache' is the cache mode used to write the output disk image, the valid\n"
" options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
" 'directsync' and 'unsafe' (default for convert)\n"
" 'src_cache' is the cache mode used to read input disk images, the valid\n"
" options are the same as for the 'cache' option\n"
" 'size' is the disk image size in bytes. Optional suffixes\n"
" 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
" 'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P) are\n"
" supported. 'b' is ignored.\n"
" 'output_filename' is the destination disk image filename\n"
" 'output_fmt' is the destination format\n"
" 'options' is a comma separated list of format specific options in a\n"
" name=value format. Use -o ? for an overview of the options supported by the\n"
" used format\n"
" 'snapshot_param' is param used for internal snapshot, format\n"
" is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
" '[ID_OR_NAME]'\n"
" 'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n"
" instead\n"
" '-c' indicates that target image must be compressed (qcow format only)\n"
" '-u' enables unsafe rebasing. It is assumed that old and new backing file\n"
" match exactly. The image doesn't need a working backing file before\n"
" rebasing in this case (useful for renaming the backing file)\n"
" '-h' with or without a command shows this help and lists the supported formats\n"
" '-p' show progress of command (only certain commands)\n"
" '-q' use Quiet mode - do not print any output (except errors)\n"
" '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
" contain only zeros for qemu-img to create a sparse image during\n"
" conversion. If the number of bytes is 0, the source will not be scanned for\n"
" unallocated or zero sectors, and the destination image will always be\n"
" fully allocated\n"
" '--output' takes the format in which the output must be done (human or json)\n"
" '-n' skips the target volume creation (useful if the volume is created\n"
" prior to running qemu-img)\n"
"\n"
"Parameters to check subcommand:\n"
" '-r' tries to repair any inconsistencies that are found during the check.\n"
" '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
" kinds of errors, with a higher risk of choosing the wrong fix or\n"
" hiding corruption that has already occurred.\n"
"\n"
"Parameters to snapshot subcommand:\n"
" 'snapshot' is the name of the snapshot to create, apply or delete\n"
" '-a' applies a snapshot (revert disk to saved state)\n"
" '-c' creates a snapshot\n"
" '-d' deletes a snapshot\n"
" '-l' lists all snapshots in the given image\n"
"\n"
"Parameters to compare subcommand:\n"
" '-f' first image format\n"
" '-F' second image format\n"
" '-s' run in Strict mode - fail on different image size or sector allocation\n";
printf("%s\nSupported formats:", help_msg);
bdrv_iterate_format(format_print, NULL);
printf("\n");
exit(EXIT_SUCCESS);
}
static QemuOptsList qemu_object_opts = {
.name = "object",
.implied_opt_name = "qom-type",
.head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
.desc = {
{ }
},
};
static QemuOptsList qemu_source_opts = {
.name = "source",
.implied_opt_name = "file",
.head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
.desc = {
{ }
},
};
static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
{
int ret = 0;
if (!quiet) {
va_list args;
va_start(args, fmt);
ret = vprintf(fmt, args);
va_end(args);
}
return ret;
}
static int print_block_option_help(const char *filename, const char *fmt)
{
BlockDriver *drv, *proto_drv;
QemuOptsList *create_opts = NULL;
Error *local_err = NULL;
/* Find driver and parse its options */
drv = bdrv_find_format(fmt);
if (!drv) {
error_report("Unknown file format '%s'", fmt);
return 1;
}
create_opts = qemu_opts_append(create_opts, drv->create_opts);
if (filename) {
proto_drv = bdrv_find_protocol(filename, true, &local_err);
if (!proto_drv) {
error_report_err(local_err);
qemu_opts_free(create_opts);
return 1;
}
create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
}
qemu_opts_print_help(create_opts);
qemu_opts_free(create_opts);
return 0;
}
static int img_open_password(BlockBackend *blk, const char *filename,
int flags, bool quiet)
{
BlockDriverState *bs;
char password[256];
bs = blk_bs(blk);
if (bdrv_is_encrypted(bs) && bdrv_key_required(bs) &&
!(flags & BDRV_O_NO_IO)) {
qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
if (qemu_read_password(password, sizeof(password)) < 0) {
error_report("No password given");
return -1;
}
if (bdrv_set_key(bs, password) < 0) {
error_report("invalid password");
return -1;
}
}
return 0;
}
static BlockBackend *img_open_opts(const char *optstr,
QemuOpts *opts, int flags, bool writethrough,
bool quiet)
{
QDict *options;
Error *local_err = NULL;
BlockBackend *blk;
options = qemu_opts_to_qdict(opts, NULL);
blk = blk_new_open(NULL, NULL, options, flags, &local_err);
if (!blk) {
error_reportf_err(local_err, "Could not open '%s': ", optstr);
return NULL;
}
blk_set_enable_write_cache(blk, !writethrough);
if (img_open_password(blk, optstr, flags, quiet) < 0) {
blk_unref(blk);
return NULL;
}
return blk;
}
static BlockBackend *img_open_file(const char *filename,
const char *fmt, int flags,
bool writethrough, bool quiet)
{
BlockBackend *blk;
Error *local_err = NULL;
QDict *options = NULL;
if (fmt) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(fmt));
}
blk = blk_new_open(filename, NULL, options, flags, &local_err);
if (!blk) {
error_reportf_err(local_err, "Could not open '%s': ", filename);
return NULL;
}
blk_set_enable_write_cache(blk, !writethrough);
if (img_open_password(blk, filename, flags, quiet) < 0) {
blk_unref(blk);
return NULL;
}
return blk;
}
static BlockBackend *img_open(bool image_opts,
const char *filename,
const char *fmt, int flags, bool writethrough,
bool quiet)
{
BlockBackend *blk;
if (image_opts) {
QemuOpts *opts;
if (fmt) {
error_report("--image-opts and --format are mutually exclusive");
return NULL;
}
opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
filename, true);
if (!opts) {
return NULL;
}
blk = img_open_opts(filename, opts, flags, writethrough, quiet);
} else {
blk = img_open_file(filename, fmt, flags, writethrough, quiet);
}
return blk;
}
static int add_old_style_options(const char *fmt, QemuOpts *opts,
const char *base_filename,
const char *base_fmt)
{
Error *err = NULL;
if (base_filename) {
qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
if (err) {
error_report("Backing file not supported for file format '%s'",
fmt);
error_free(err);
return -1;
}
}
if (base_fmt) {
qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
if (err) {
error_report("Backing file format not supported for file "
"format '%s'", fmt);
error_free(err);
return -1;
}
}
return 0;
}
static int img_create(int argc, char **argv)
{
int c;
uint64_t img_size = -1;
const char *fmt = "raw";
const char *base_fmt = NULL;
const char *filename;
const char *base_filename = NULL;
char *options = NULL;
Error *local_err = NULL;
bool quiet = false;
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "F:b:f:he6o:q",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'F':
base_fmt = optarg;
break;
case 'b':
base_filename = optarg;
break;
case 'f':
fmt = optarg;
break;
case 'e':
error_report("option -e is deprecated, please use \'-o "
"encryption\' instead!");
goto fail;
case '6':
error_report("option -6 is deprecated, please use \'-o "
"compat6\' instead!");
goto fail;
case 'o':
if (!is_valid_option_list(optarg)) {
error_report("Invalid option list: %s", optarg);
goto fail;
}
if (!options) {
options = g_strdup(optarg);
} else {
char *old_options = options;
options = g_strdup_printf("%s,%s", options, optarg);
g_free(old_options);
}
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
goto fail;
}
} break;
}
}
/* Get the filename */
filename = (optind < argc) ? argv[optind] : NULL;
if (options && has_help_option(options)) {
g_free(options);
return print_block_option_help(filename, fmt);
}
if (optind >= argc) {
error_exit("Expecting image file name");
}
optind++;
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
goto fail;
}
/* Get image size, if specified */
if (optind < argc) {
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(argv[optind++], &end,
QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || *end) {
if (sval == -ERANGE) {
error_report("Image size must be less than 8 EiB!");
} else {
error_report("Invalid image size specified! You may use k, M, "
"G, T, P or E suffixes for ");
error_report("kilobytes, megabytes, gigabytes, terabytes, "
"petabytes and exabytes.");
}
goto fail;
}
img_size = (uint64_t)sval;
}
if (optind != argc) {
error_exit("Unexpected argument: %s", argv[optind]);
}
bdrv_img_create(filename, fmt, base_filename, base_fmt,
options, img_size, 0, &local_err, quiet);
if (local_err) {
error_reportf_err(local_err, "%s: ", filename);
goto fail;
}
g_free(options);
return 0;
fail:
g_free(options);
return 1;
}
static void dump_json_image_check(ImageCheck *check, bool quiet)
{
QString *str;
QObject *obj;
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
Visitor *v = qmp_output_visitor_new(&obj);
visit_type_ImageCheck(v, NULL, &check, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj);
assert(str != NULL);
qprintf(quiet, "%s\n", qstring_get_str(str));
qobject_decref(obj);
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
visit_free(v);
QDECREF(str);
}
static void dump_human_image_check(ImageCheck *check, bool quiet)
{
if (!(check->corruptions || check->leaks || check->check_errors)) {
qprintf(quiet, "No errors were found on the image.\n");
} else {
if (check->corruptions) {
qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
"Data may be corrupted, or further writes to the image "
"may corrupt it.\n",
check->corruptions);
}
if (check->leaks) {
qprintf(quiet,
"\n%" PRId64 " leaked clusters were found on the image.\n"
"This means waste of disk space, but no harm to data.\n",
check->leaks);
}
if (check->check_errors) {
qprintf(quiet,
"\n%" PRId64
" internal errors have occurred during the check.\n",
check->check_errors);
}
}
if (check->total_clusters != 0 && check->allocated_clusters != 0) {
qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
"%0.2f%% fragmented, %0.2f%% compressed clusters\n",
check->allocated_clusters, check->total_clusters,
check->allocated_clusters * 100.0 / check->total_clusters,
check->fragmented_clusters * 100.0 / check->allocated_clusters,
check->compressed_clusters * 100.0 /
check->allocated_clusters);
}
if (check->image_end_offset) {
qprintf(quiet,
"Image end offset: %" PRId64 "\n", check->image_end_offset);
}
}
static int collect_image_check(BlockDriverState *bs,
ImageCheck *check,
const char *filename,
const char *fmt,
int fix)
{
int ret;
BdrvCheckResult result;
ret = bdrv_check(bs, &result, fix);
if (ret < 0) {
return ret;
}
check->filename = g_strdup(filename);
check->format = g_strdup(bdrv_get_format_name(bs));
check->check_errors = result.check_errors;
check->corruptions = result.corruptions;
check->has_corruptions = result.corruptions != 0;
check->leaks = result.leaks;
check->has_leaks = result.leaks != 0;
check->corruptions_fixed = result.corruptions_fixed;
check->has_corruptions_fixed = result.corruptions != 0;
check->leaks_fixed = result.leaks_fixed;
check->has_leaks_fixed = result.leaks != 0;
check->image_end_offset = result.image_end_offset;
check->has_image_end_offset = result.image_end_offset != 0;
check->total_clusters = result.bfi.total_clusters;
check->has_total_clusters = result.bfi.total_clusters != 0;
check->allocated_clusters = result.bfi.allocated_clusters;
check->has_allocated_clusters = result.bfi.allocated_clusters != 0;
check->fragmented_clusters = result.bfi.fragmented_clusters;
check->has_fragmented_clusters = result.bfi.fragmented_clusters != 0;
check->compressed_clusters = result.bfi.compressed_clusters;
check->has_compressed_clusters = result.bfi.compressed_clusters != 0;
return 0;
}
/*
* Checks an image for consistency. Exit codes:
*
* 0 - Check completed, image is good
* 1 - Check not completed because of internal errors
* 2 - Check completed, image is corrupted
* 3 - Check completed, image has leaked clusters, but is good otherwise
* 63 - Checks are not supported by the image format
*/
static int img_check(int argc, char **argv)
{
int c, ret;
OutputFormat output_format = OFORMAT_HUMAN;
const char *filename, *fmt, *output, *cache;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk;
BlockDriverState *bs;
int fix = 0;
int flags = BDRV_O_CHECK;
bool writethrough;
ImageCheck *check;
bool quiet = false;
bool image_opts = false;
fmt = NULL;
output = NULL;
cache = BDRV_DEFAULT_CACHE;
for(;;) {
int option_index = 0;
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"format", required_argument, 0, 'f'},
{"repair", required_argument, 0, 'r'},
{"output", required_argument, 0, OPTION_OUTPUT},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hf:r:T:q",
long_options, &option_index);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case 'r':
flags |= BDRV_O_RDWR;
if (!strcmp(optarg, "leaks")) {
fix = BDRV_FIX_LEAKS;
} else if (!strcmp(optarg, "all")) {
fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
} else {
error_exit("Unknown option value for -r "
"(expecting 'leaks' or 'all'): %s", optarg);
}
break;
case OPTION_OUTPUT:
output = optarg;
break;
case 'T':
cache = optarg;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
if (output && !strcmp(output, "json")) {
output_format = OFORMAT_JSON;
} else if (output && !strcmp(output, "human")) {
output_format = OFORMAT_HUMAN;
} else if (output) {
error_report("--output must be used with human or json as argument.");
return 1;
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid source cache option: %s", cache);
return 1;
}
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
return 1;
}
bs = blk_bs(blk);
check = g_new0(ImageCheck, 1);
ret = collect_image_check(bs, check, filename, fmt, fix);
if (ret == -ENOTSUP) {
error_report("This image format does not support checks");
ret = 63;
goto fail;
}
if (check->corruptions_fixed || check->leaks_fixed) {
int corruptions_fixed, leaks_fixed;
leaks_fixed = check->leaks_fixed;
corruptions_fixed = check->corruptions_fixed;
if (output_format == OFORMAT_HUMAN) {
qprintf(quiet,
"The following inconsistencies were found and repaired:\n\n"
" %" PRId64 " leaked clusters\n"
" %" PRId64 " corruptions\n\n"
"Double checking the fixed image now...\n",
check->leaks_fixed,
check->corruptions_fixed);
}
ret = collect_image_check(bs, check, filename, fmt, 0);
check->leaks_fixed = leaks_fixed;
check->corruptions_fixed = corruptions_fixed;
}
if (!ret) {
switch (output_format) {
case OFORMAT_HUMAN:
dump_human_image_check(check, quiet);
break;
case OFORMAT_JSON:
dump_json_image_check(check, quiet);
break;
}
}
if (ret || check->check_errors) {
if (ret) {
error_report("Check failed: %s", strerror(-ret));
} else {
error_report("Check failed");
}
ret = 1;
goto fail;
}
if (check->corruptions) {
ret = 2;
} else if (check->leaks) {
ret = 3;
} else {
ret = 0;
}
fail:
qapi_free_ImageCheck(check);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
return ret;
}
typedef struct CommonBlockJobCBInfo {
BlockDriverState *bs;
Error **errp;
} CommonBlockJobCBInfo;
static void common_block_job_cb(void *opaque, int ret)
{
CommonBlockJobCBInfo *cbi = opaque;
if (ret < 0) {
error_setg_errno(cbi->errp, -ret, "Block job failed");
}
}
static void run_block_job(BlockJob *job, Error **errp)
{
AioContext *aio_context = blk_get_aio_context(job->blk);
do {
aio_poll(aio_context, true);
qemu_progress_print(job->len ?
((float)job->offset / job->len * 100.f) : 0.0f, 0);
} while (!job->ready);
block_job_complete_sync(job, errp);
/* A block job may finish instantaneously without publishing any progress,
* so just signal completion here */
qemu_progress_print(100.f, 0);
}
static int img_commit(int argc, char **argv)
{
int c, ret, flags;
const char *filename, *fmt, *cache, *base;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk;
BlockDriverState *bs, *base_bs;
bool progress = false, quiet = false, drop = false;
bool writethrough;
Error *local_err = NULL;
CommonBlockJobCBInfo cbi;
bool image_opts = false;
fmt = NULL;
cache = BDRV_DEFAULT_CACHE;
base = NULL;
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:ht:b:dpq",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case 't':
cache = optarg;
break;
case 'b':
base = optarg;
/* -b implies -d */
drop = true;
break;
case 'd':
drop = true;
break;
case 'p':
progress = true;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
/* Progress is not shown in Quiet mode */
if (quiet) {
progress = false;
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
flags = BDRV_O_RDWR | BDRV_O_UNMAP;
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
return 1;
}
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
return 1;
}
bs = blk_bs(blk);
qemu_progress_init(progress, 1.f);
qemu_progress_print(0.f, 100);
if (base) {
base_bs = bdrv_find_backing_image(bs, base);
if (!base_bs) {
error_setg(&local_err, QERR_BASE_NOT_FOUND, base);
goto done;
}
} else {
/* This is different from QMP, which by default uses the deepest file in
* the backing chain (i.e., the very base); however, the traditional
* behavior of qemu-img commit is using the immediate backing file. */
base_bs = backing_bs(bs);
if (!base_bs) {
error_setg(&local_err, "Image does not have a backing file");
goto done;
}
}
cbi = (CommonBlockJobCBInfo){
.errp = &local_err,
.bs = bs,
};
commit_active_start("commit", bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
common_block_job_cb, &cbi, &local_err);
if (local_err) {
goto done;
}
/* When the block job completes, the BlockBackend reference will point to
* the old backing file. In order to avoid that the top image is already
* deleted, so we can still empty it afterwards, increment the reference
* counter here preemptively. */
if (!drop) {
bdrv_ref(bs);
}
run_block_job(bs->job, &local_err);
if (local_err) {
goto unref_backing;
}
if (!drop && bs->drv->bdrv_make_empty) {
ret = bs->drv->bdrv_make_empty(bs);
if (ret) {
error_setg_errno(&local_err, -ret, "Could not empty %s",
filename);
goto unref_backing;
}
}
unref_backing:
if (!drop) {
bdrv_unref(bs);
}
done:
qemu_progress_end();
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
if (local_err) {
error_report_err(local_err);
return 1;
}
qprintf(quiet, "Image committed.\n");
return 0;
}
/*
* Returns true iff the first sector pointed to by 'buf' contains at least
* a non-NUL byte.
*
* 'pnum' is set to the number of sectors (including and immediately following
* the first one) that are known to be in the same allocated/unallocated state.
*/
static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
{
bool is_zero;
int i;
if (n <= 0) {
*pnum = 0;
return 0;
}
is_zero = buffer_is_zero(buf, 512);
for(i = 1; i < n; i++) {
buf += 512;
if (is_zero != buffer_is_zero(buf, 512)) {
break;
}
}
*pnum = i;
return !is_zero;
}
/*
* Like is_allocated_sectors, but if the buffer starts with a used sector,
* up to 'min' consecutive sectors containing zeros are ignored. This avoids
* breaking up write requests for only small sparse areas.
*/
static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
int min)
{
int ret;
int num_checked, num_used;
if (n < min) {
min = n;
}
ret = is_allocated_sectors(buf, n, pnum);
if (!ret) {
return ret;
}
num_used = *pnum;
buf += BDRV_SECTOR_SIZE * *pnum;
n -= *pnum;
num_checked = num_used;
while (n > 0) {
ret = is_allocated_sectors(buf, n, pnum);
buf += BDRV_SECTOR_SIZE * *pnum;
n -= *pnum;
num_checked += *pnum;
if (ret) {
num_used = num_checked;
} else if (*pnum >= min) {
break;
}
}
*pnum = num_used;
return 1;
}
/*
* Compares two buffers sector by sector. Returns 0 if the first sector of both
* buffers matches, non-zero otherwise.
*
* pnum is set to the number of sectors (including and immediately following
* the first one) that are known to have the same comparison result
*/
static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
int *pnum)
{
bool res;
int i;
if (n <= 0) {
*pnum = 0;
return 0;
}
res = !!memcmp(buf1, buf2, 512);
for(i = 1; i < n; i++) {
buf1 += 512;
buf2 += 512;
if (!!memcmp(buf1, buf2, 512) != res) {
break;
}
}
*pnum = i;
return res;
}
#define IO_BUF_SIZE (2 * 1024 * 1024)
static int64_t sectors_to_bytes(int64_t sectors)
{
return sectors << BDRV_SECTOR_BITS;
}
static int64_t sectors_to_process(int64_t total, int64_t from)
{
return MIN(total - from, IO_BUF_SIZE >> BDRV_SECTOR_BITS);
}
/*
* Check if passed sectors are empty (not allocated or contain only 0 bytes)
*
* Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero
* data and negative value on error.
*
* @param blk: BlockBackend for the image
* @param sect_num: Number of first sector to check
* @param sect_count: Number of sectors to check
* @param filename: Name of disk file we are checking (logging purpose)
* @param buffer: Allocated buffer for storing read data
* @param quiet: Flag for quiet mode
*/
static int check_empty_sectors(BlockBackend *blk, int64_t sect_num,
int sect_count, const char *filename,
uint8_t *buffer, bool quiet)
{
int pnum, ret = 0;
ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
sect_count << BDRV_SECTOR_BITS);
if (ret < 0) {
error_report("Error while reading offset %" PRId64 " of %s: %s",
sectors_to_bytes(sect_num), filename, strerror(-ret));
return ret;
}
ret = is_allocated_sectors(buffer, sect_count, &pnum);
if (ret || pnum != sect_count) {
qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
sectors_to_bytes(ret ? sect_num : sect_num + pnum));
return 1;
}
return 0;
}
/*
* Compares two images. Exit codes:
*
* 0 - Images are identical
* 1 - Images differ
* >1 - Error occurred
*/
static int img_compare(int argc, char **argv)
{
const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk1, *blk2;
BlockDriverState *bs1, *bs2;
int64_t total_sectors1, total_sectors2;
uint8_t *buf1 = NULL, *buf2 = NULL;
int pnum1, pnum2;
int allocated1, allocated2;
int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
bool progress = false, quiet = false, strict = false;
int flags;
bool writethrough;
int64_t total_sectors;
int64_t sector_num = 0;
int64_t nb_sectors;
int c, pnum;
uint64_t progress_base;
bool image_opts = false;
cache = BDRV_DEFAULT_CACHE;
for (;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hf:F:T:pqs",
long_options, NULL);
if (c == -1) {
break;
}
switch (c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt1 = optarg;
break;
case 'F':
fmt2 = optarg;
break;
case 'T':
cache = optarg;
break;
case 'p':
progress = true;
break;
case 'q':
quiet = true;
break;
case 's':
strict = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
ret = 2;
goto out4;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
/* Progress is not shown in Quiet mode */
if (quiet) {
progress = false;
}
if (optind != argc - 2) {
error_exit("Expecting two image file names");
}
filename1 = argv[optind++];
filename2 = argv[optind++];
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
ret = 2;
goto out4;
}
/* Initialize before goto out */
qemu_progress_init(progress, 2.0);
flags = 0;
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid source cache option: %s", cache);
ret = 2;
goto out3;
}
blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet);
if (!blk1) {
ret = 2;
goto out3;
}
blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet);
if (!blk2) {
ret = 2;
goto out2;
}
bs1 = blk_bs(blk1);
bs2 = blk_bs(blk2);
buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
total_sectors1 = blk_nb_sectors(blk1);
if (total_sectors1 < 0) {
error_report("Can't get size of %s: %s",
filename1, strerror(-total_sectors1));
ret = 4;
goto out;
}
total_sectors2 = blk_nb_sectors(blk2);
if (total_sectors2 < 0) {
error_report("Can't get size of %s: %s",
filename2, strerror(-total_sectors2));
ret = 4;
goto out;
}
total_sectors = MIN(total_sectors1, total_sectors2);
progress_base = MAX(total_sectors1, total_sectors2);
qemu_progress_print(0, 100);
if (strict && total_sectors1 != total_sectors2) {
ret = 1;
qprintf(quiet, "Strict mode: Image size mismatch!\n");
goto out;
}
for (;;) {
int64_t status1, status2;
BlockDriverState *file;
nb_sectors = sectors_to_process(total_sectors, sector_num);
if (nb_sectors <= 0) {
break;
}
status1 = bdrv_get_block_status_above(bs1, NULL, sector_num,
total_sectors1 - sector_num,
&pnum1, &file);
if (status1 < 0) {
ret = 3;
error_report("Sector allocation test failed for %s", filename1);
goto out;
}
allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
status2 = bdrv_get_block_status_above(bs2, NULL, sector_num,
total_sectors2 - sector_num,
&pnum2, &file);
if (status2 < 0) {
ret = 3;
error_report("Sector allocation test failed for %s", filename2);
goto out;
}
allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
if (pnum1) {
nb_sectors = MIN(nb_sectors, pnum1);
}
if (pnum2) {
nb_sectors = MIN(nb_sectors, pnum2);
}
if (strict) {
if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) !=
(status2 & ~BDRV_BLOCK_OFFSET_MASK)) {
ret = 1;
qprintf(quiet, "Strict mode: Offset %" PRId64
" block status mismatch!\n",
sectors_to_bytes(sector_num));
goto out;
}
}
if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
nb_sectors = MIN(pnum1, pnum2);
} else if (allocated1 == allocated2) {
if (allocated1) {
ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
nb_sectors << BDRV_SECTOR_BITS);
if (ret < 0) {
error_report("Error while reading offset %" PRId64 " of %s:"
" %s", sectors_to_bytes(sector_num), filename1,
strerror(-ret));
ret = 4;
goto out;
}
ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
nb_sectors << BDRV_SECTOR_BITS);
if (ret < 0) {
error_report("Error while reading offset %" PRId64
" of %s: %s", sectors_to_bytes(sector_num),
filename2, strerror(-ret));
ret = 4;
goto out;
}
ret = compare_sectors(buf1, buf2, nb_sectors, &pnum);
if (ret || pnum != nb_sectors) {
qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
sectors_to_bytes(
ret ? sector_num : sector_num + pnum));
ret = 1;
goto out;
}
}
} else {
if (allocated1) {
ret = check_empty_sectors(blk1, sector_num, nb_sectors,
filename1, buf1, quiet);
} else {
ret = check_empty_sectors(blk2, sector_num, nb_sectors,
filename2, buf1, quiet);
}
if (ret) {
if (ret < 0) {
error_report("Error while reading offset %" PRId64 ": %s",
sectors_to_bytes(sector_num), strerror(-ret));
ret = 4;
}
goto out;
}
}
sector_num += nb_sectors;
qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
}
if (total_sectors1 != total_sectors2) {
BlockBackend *blk_over;
int64_t total_sectors_over;
const char *filename_over;
qprintf(quiet, "Warning: Image size mismatch!\n");
if (total_sectors1 > total_sectors2) {
total_sectors_over = total_sectors1;
blk_over = blk1;
filename_over = filename1;
} else {
total_sectors_over = total_sectors2;
blk_over = blk2;
filename_over = filename2;
}
for (;;) {
nb_sectors = sectors_to_process(total_sectors_over, sector_num);
if (nb_sectors <= 0) {
break;
}
ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
nb_sectors, &pnum);
if (ret < 0) {
ret = 3;
error_report("Sector allocation test failed for %s",
filename_over);
goto out;
}
nb_sectors = pnum;
if (ret) {
ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
filename_over, buf1, quiet);
if (ret) {
if (ret < 0) {
error_report("Error while reading offset %" PRId64
" of %s: %s", sectors_to_bytes(sector_num),
filename_over, strerror(-ret));
ret = 4;
}
goto out;
}
}
sector_num += nb_sectors;
qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
}
}
qprintf(quiet, "Images are identical.\n");
ret = 0;
out:
qemu_vfree(buf1);
qemu_vfree(buf2);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk2);
out2:
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk1);
out3:
qemu_progress_end();
out4:
return ret;
}
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
enum ImgConvertBlockStatus {
BLK_DATA,
BLK_ZERO,
BLK_BACKING_FILE,
};
typedef struct ImgConvertState {
BlockBackend **src;
int64_t *src_sectors;
int src_cur, src_num;
int64_t src_cur_offset;
int64_t total_sectors;
int64_t allocated_sectors;
enum ImgConvertBlockStatus status;
int64_t sector_next_status;
BlockBackend *target;
bool has_zero_init;
bool compressed;
bool target_has_backing;
int min_sparse;
size_t cluster_sectors;
size_t buf_sectors;
} ImgConvertState;
static void convert_select_part(ImgConvertState *s, int64_t sector_num)
{
assert(sector_num >= s->src_cur_offset);
while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
s->src_cur_offset += s->src_sectors[s->src_cur];
s->src_cur++;
assert(s->src_cur < s->src_num);
}
}
static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
{
int64_t ret;
int n;
convert_select_part(s, sector_num);
assert(s->total_sectors > sector_num);
n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
if (s->sector_next_status <= sector_num) {
BlockDriverState *file;
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
sector_num - s->src_cur_offset,
n, &n, &file);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
return ret;
}
if (ret & BDRV_BLOCK_ZERO) {
s->status = BLK_ZERO;
} else if (ret & BDRV_BLOCK_DATA) {
s->status = BLK_DATA;
} else if (!s->target_has_backing) {
/* Without a target backing file we must copy over the contents of
* the backing file as well. */
/* Check block status of the backing file chain to avoid
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
* needlessly reading zeroes and limiting the iteration to the
* buffer size */
ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
sector_num - s->src_cur_offset,
n, &n, &file);
if (ret < 0) {
return ret;
}
if (ret & BDRV_BLOCK_ZERO) {
s->status = BLK_ZERO;
} else {
s->status = BLK_DATA;
}
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
} else {
s->status = BLK_BACKING_FILE;
}
s->sector_next_status = sector_num + n;
}
n = MIN(n, s->sector_next_status - sector_num);
if (s->status == BLK_DATA) {
n = MIN(n, s->buf_sectors);
}
/* We need to write complete clusters for compressed images, so if an
* unallocated area is shorter than that, we must consider the whole
* cluster allocated. */
if (s->compressed) {
if (n < s->cluster_sectors) {
n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
s->status = BLK_DATA;
} else {
n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
}
}
return n;
}
static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
uint8_t *buf)
{
int n;
int ret;
assert(nb_sectors <= s->buf_sectors);
while (nb_sectors > 0) {
BlockBackend *blk;
int64_t bs_sectors;
/* In the case of compression with multiple source files, we can get a
* nb_sectors that spreads into the next part. So we must be able to
* read across multiple BDSes for one convert_read() call. */
convert_select_part(s, sector_num);
blk = s->src[s->src_cur];
bs_sectors = s->src_sectors[s->src_cur];
n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
ret = blk_pread(blk,
(sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
buf, n << BDRV_SECTOR_BITS);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
return ret;
}
sector_num += n;
nb_sectors -= n;
buf += n * BDRV_SECTOR_SIZE;
}
return 0;
}
static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
const uint8_t *buf)
{
int ret;
while (nb_sectors > 0) {
int n = nb_sectors;
switch (s->status) {
case BLK_BACKING_FILE:
/* If we have a backing file, leave clusters unallocated that are
* unallocated in the source image, so that the backing file is
* visible at the respective offset. */
assert(s->target_has_backing);
break;
case BLK_DATA:
/* We must always write compressed clusters as a whole, so don't
* try to find zeroed parts in the buffer. We can only save the
* write if the buffer is completely zeroed and we're allowed to
* keep the target sparse. */
if (s->compressed) {
if (s->has_zero_init && s->min_sparse &&
buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
{
assert(!s->target_has_backing);
break;
}
ret = blk_pwrite_compressed(s->target,
sector_num << BDRV_SECTOR_BITS,
buf, n << BDRV_SECTOR_BITS);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
return ret;
}
break;
}
/* If there is real non-zero data or we're told to keep the target
* fully allocated (-S 0), we must write it. Otherwise we can treat
* it as zero sectors. */
if (!s->min_sparse ||
is_allocated_sectors_min(buf, n, &n, s->min_sparse))
{
ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
buf, n << BDRV_SECTOR_BITS, 0);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
return ret;
}
break;
}
/* fall-through */
case BLK_ZERO:
if (s->has_zero_init) {
break;
}
ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
n << BDRV_SECTOR_BITS, 0);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
return ret;
}
break;
}
sector_num += n;
nb_sectors -= n;
buf += n * BDRV_SECTOR_SIZE;
}
return 0;
}
static int convert_do_copy(ImgConvertState *s)
{
uint8_t *buf = NULL;
int64_t sector_num, allocated_done;
int ret;
int n;
/* Check whether we have zero initialisation or can get it efficiently */
s->has_zero_init = s->min_sparse && !s->target_has_backing
? bdrv_has_zero_init(blk_bs(s->target))
: false;
if (!s->has_zero_init && !s->target_has_backing &&
bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
{
ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret == 0) {
s->has_zero_init = true;
}
}
/* Allocate buffer for copied data. For compressed images, only one cluster
* can be copied at a time. */
if (s->compressed) {
if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
error_report("invalid cluster size");
ret = -EINVAL;
goto fail;
}
s->buf_sectors = s->cluster_sectors;
}
buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
/* Calculate allocated sectors for progress */
s->allocated_sectors = 0;
sector_num = 0;
while (sector_num < s->total_sectors) {
n = convert_iteration_sectors(s, sector_num);
if (n < 0) {
ret = n;
goto fail;
}
if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
{
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
s->allocated_sectors += n;
}
sector_num += n;
}
/* Do the copy */
s->src_cur = 0;
s->src_cur_offset = 0;
s->sector_next_status = 0;
sector_num = 0;
allocated_done = 0;
while (sector_num < s->total_sectors) {
n = convert_iteration_sectors(s, sector_num);
if (n < 0) {
ret = n;
goto fail;
}
if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
{
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
allocated_done += n;
qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
0);
}
if (s->status == BLK_DATA) {
ret = convert_read(s, sector_num, n, buf);
if (ret < 0) {
error_report("error while reading sector %" PRId64
": %s", sector_num, strerror(-ret));
goto fail;
}
} else if (!s->min_sparse && s->status == BLK_ZERO) {
n = MIN(n, s->buf_sectors);
memset(buf, 0, n * BDRV_SECTOR_SIZE);
s->status = BLK_DATA;
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
}
ret = convert_write(s, sector_num, n, buf);
if (ret < 0) {
error_report("error while writing sector %" PRId64
": %s", sector_num, strerror(-ret));
goto fail;
}
sector_num += n;
}
if (s->compressed) {
/* signal EOF to align */
ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
if (ret < 0) {
goto fail;
}
}
ret = 0;
fail:
qemu_vfree(buf);
return ret;
}
static int img_convert(int argc, char **argv)
{
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
int64_t ret = 0;
int progress = 0, flags, src_flags;
bool writethrough, src_writethrough;
const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
BlockDriver *drv, *proto_drv;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend **blk = NULL, *out_blk = NULL;
BlockDriverState **bs = NULL, *out_bs = NULL;
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
int64_t total_sectors;
int64_t *bs_sectors = NULL;
size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
BlockDriverInfo bdi;
QemuOpts *opts = NULL;
QemuOptsList *create_opts = NULL;
const char *out_baseimg_param;
char *options = NULL;
const char *snapshot_name = NULL;
int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
bool quiet = false;
Error *local_err = NULL;
QemuOpts *sn_opts = NULL;
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
ImgConvertState state;
bool image_opts = false;
fmt = NULL;
out_fmt = "raw";
cache = "unsafe";
src_cache = BDRV_DEFAULT_CACHE;
out_baseimg = NULL;
compress = 0;
skip_create = 0;
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case 'O':
out_fmt = optarg;
break;
case 'B':
out_baseimg = optarg;
break;
case 'c':
compress = 1;
break;
case 'e':
error_report("option -e is deprecated, please use \'-o "
"encryption\' instead!");
ret = -1;
goto fail_getopt;
case '6':
error_report("option -6 is deprecated, please use \'-o "
"compat6\' instead!");
ret = -1;
goto fail_getopt;
case 'o':
if (!is_valid_option_list(optarg)) {
error_report("Invalid option list: %s", optarg);
ret = -1;
goto fail_getopt;
}
if (!options) {
options = g_strdup(optarg);
} else {
char *old_options = options;
options = g_strdup_printf("%s,%s", options, optarg);
g_free(old_options);
}
break;
case 's':
snapshot_name = optarg;
break;
case 'l':
if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
QemuOpts: Wean off qerror_report_err() qerror_report_err() is a transitional interface to help with converting existing monitor commands to QMP. It should not be used elsewhere. The only remaining user in qemu-option.c is qemu_opts_parse(). Is it used in QMP context? If not, we can simply replace qerror_report_err() by error_report_err(). The uses in qemu-img.c, qemu-io.c, qemu-nbd.c and under tests/ are clearly not in QMP context. The uses in vl.c aren't either, because the only QMP command handlers there are qmp_query_status() and qmp_query_machines(), and they don't call it. Remaining uses: * drive_def(): Command line -drive and such, HMP drive_add and pci_add * hmp_chardev_add(): HMP chardev-add * monitor_parse_command(): HMP core * tmp_config_parse(): Command line -tpmdev * net_host_device_add(): HMP host_net_add * net_client_parse(): Command line -net and -netdev * qemu_global_option(): Command line -global * vnc_parse_func(): Command line -display, -vnc, default display, HMP change, QMP change. Bummer. * qemu_pci_hot_add_nic(): HMP pci_add * usb_net_init(): Command line -usbdevice, HMP usb_add Propagate errors through qemu_opts_parse(). Create a convenience function qemu_opts_parse_noisily() that passes errors to error_report_err(). Switch all non-QMP users outside tests to it. That leaves vnc_parse_func(). Propagate errors through it. Since I'm touching it anyway, rename it to vnc_parse(). Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Luiz Capitulino <lcapitulino@redhat.com>
2015-02-13 11:50:26 +00:00
sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
optarg, false);
if (!sn_opts) {
error_report("Failed in parsing snapshot param '%s'",
optarg);
ret = -1;
goto fail_getopt;
}
} else {
snapshot_name = optarg;
}
break;
case 'S':
{
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || *end) {
error_report("Invalid minimum zero buffer size for sparse output specified");
ret = -1;
goto fail_getopt;
}
min_sparse = sval / BDRV_SECTOR_SIZE;
break;
}
case 'p':
progress = 1;
break;
case 't':
cache = optarg;
break;
case 'T':
src_cache = optarg;
break;
case 'q':
quiet = true;
break;
case 'n':
skip_create = 1;
break;
case OPTION_OBJECT:
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
goto fail_getopt;
}
break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
goto fail_getopt;
}
/* Initialize before goto out */
if (quiet) {
progress = 0;
}
qemu_progress_init(progress, 1.0);
bs_n = argc - optind - 1;
out_filename = bs_n >= 1 ? argv[argc - 1] : NULL;
if (options && has_help_option(options)) {
ret = print_block_option_help(out_filename, out_fmt);
goto out;
}
if (bs_n < 1) {
error_exit("Must specify image file name");
}
if (bs_n > 1 && out_baseimg) {
error_report("-B makes no sense when concatenating multiple input "
"images");
ret = -1;
goto out;
}
src_flags = 0;
ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
if (ret < 0) {
error_report("Invalid source cache option: %s", src_cache);
goto out;
}
qemu_progress_print(0, 100);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk = g_new0(BlockBackend *, bs_n);
bs = g_new0(BlockDriverState *, bs_n);
bs_sectors = g_new(int64_t, bs_n);
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
blk[bs_i] = img_open(image_opts, argv[optind + bs_i],
fmt, src_flags, src_writethrough, quiet);
if (!blk[bs_i]) {
ret = -1;
goto out;
}
bs[bs_i] = blk_bs(blk[bs_i]);
bs_sectors[bs_i] = blk_nb_sectors(blk[bs_i]);
if (bs_sectors[bs_i] < 0) {
error_report("Could not get size of %s: %s",
argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
ret = -1;
goto out;
}
total_sectors += bs_sectors[bs_i];
}
if (sn_opts) {
ret = bdrv_snapshot_load_tmp(bs[0],
qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
&local_err);
} else if (snapshot_name != NULL) {
if (bs_n > 1) {
error_report("No support for concatenating multiple snapshot");
ret = -1;
goto out;
}
bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, &local_err);
}
if (local_err) {
error_reportf_err(local_err, "Failed to load snapshot: ");
ret = -1;
goto out;
}
/* Find driver and parse its options */
drv = bdrv_find_format(out_fmt);
if (!drv) {
error_report("Unknown file format '%s'", out_fmt);
ret = -1;
goto out;
}
proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
if (!proto_drv) {
error_report_err(local_err);
ret = -1;
goto out;
}
if (!skip_create) {
if (!drv->create_opts) {
error_report("Format driver '%s' does not support image creation",
drv->format_name);
ret = -1;
goto out;
}
if (!proto_drv->create_opts) {
error_report("Protocol driver '%s' does not support image creation",
proto_drv->format_name);
ret = -1;
goto out;
}
create_opts = qemu_opts_append(create_opts, drv->create_opts);
create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
if (options) {
qemu_opts_do_parse(opts, options, NULL, &local_err);
if (local_err) {
error_report_err(local_err);
ret = -1;
goto out;
}
}
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512,
&error_abort);
ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
if (ret < 0) {
goto out;
}
}
/* Get backing file name if -o backing_file was used */
out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
if (out_baseimg_param) {
out_baseimg = out_baseimg_param;
}
/* Check if compression is supported */
if (compress) {
bool encryption =
qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
const char *preallocation =
qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
if (!drv->bdrv_co_pwritev_compressed) {
error_report("Compression not supported for this file format");
ret = -1;
goto out;
}
if (encryption) {
error_report("Compression and encryption not supported at "
"the same time");
ret = -1;
goto out;
}
if (preallocation
&& strcmp(preallocation, "off"))
{
error_report("Compression and preallocation not supported at "
"the same time");
ret = -1;
goto out;
}
}
if (!skip_create) {
/* Create the new image */
ret = bdrv_create(drv, out_filename, opts, &local_err);
if (ret < 0) {
error_reportf_err(local_err, "%s: error while converting %s: ",
out_filename, out_fmt);
goto out;
}
}
flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
goto out;
}
/* XXX we should allow --image-opts to trigger use of
* img_open() here, but then we have trouble with
* the bdrv_create() call which takes different params.
* Not critical right now, so fix can wait...
*/
out_blk = img_open_file(out_filename, out_fmt, flags, writethrough, quiet);
if (!out_blk) {
ret = -1;
goto out;
}
out_bs = blk_bs(out_blk);
/* increase bufsectors from the default 4096 (2M) if opt_transfer
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
* as maximum. */
bufsectors = MIN(32768,
MAX(bufsectors,
MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
out_bs->bl.pdiscard_alignment >>
BDRV_SECTOR_BITS)));
if (skip_create) {
int64_t output_sectors = blk_nb_sectors(out_blk);
if (output_sectors < 0) {
error_report("unable to get output image length: %s",
strerror(-output_sectors));
ret = -1;
goto out;
} else if (output_sectors < total_sectors) {
error_report("output file is smaller than input file");
ret = -1;
goto out;
}
}
cluster_sectors = 0;
ret = bdrv_get_info(out_bs, &bdi);
if (ret < 0) {
if (compress) {
error_report("could not get block driver info");
goto out;
}
} else {
compress = compress || bdi.needs_compressed_writes;
cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
}
qemu-img convert: Rewrite copying logic The implementation of qemu-img convert is (a) messy, (b) buggy, and (c) less efficient than possible. The changes required to beat some sense into it are massive enough that incremental changes would only make my and the reviewers' life harder. So throw it away and reimplement it from scratch. Let me give some examples what I mean by messy, buggy and inefficient: (a) The copying logic of qemu-img convert has two separate branches for compressed and normal target images, which roughly do the same - except for a little code that handles actual differences between compressed and uncompressed images, and much more code that implements just a different set of optimisations and bugs. This is unnecessary code duplication, and makes the code for compressed output (unsurprisingly) suffer from bitrot. The code for uncompressed ouput is run twice to count the the total length for the progress bar. In the first run it just takes a shortcut and runs only half the loop, and when it's done, it toggles a boolean, jumps out of the loop with a backwards goto and starts over. Works, but pretty is something different. (b) Converting while keeping a backing file (-B option) is broken in several ways. This includes not writing to the image file if the input has zero clusters or data filled with zeros (ignoring that the backing file will be visible instead). It also doesn't correctly limit every iteration of the copy loop to sectors of the same status so that too many sectors may be copied to in the target image. For -B this gives an unexpected result, for other images it just does more work than necessary. Conversion with a compressed target completely ignores any target backing file. (c) qemu-img convert skips reading and writing an area if it knows from metadata that copying isn't needed (except for the bug mentioned above that ignores a status change in some cases). It does, however, read from the source even if it knows that it will read zeros, and then search for non-zero bytes in the read buffer, if it's possible that a write might be needed. This reimplementation of the copying core reorganises the code to remove the duplication and have a much more obvious code flow, by essentially splitting the copy iteration loop into three parts: 1. Find the number of contiguous sectors of the same status at the current offset (This can also be called in a separate loop before the copying loop in order to determine the total sectors for the progress bar.) 2. Read sectors. If the status implies that there is no data there to read (zero or unallocated cluster), don't do anything. 3. Write sectors depending on the status. If it's data, write it. If we want the backing file to be visible (with -B), don't write it. If it's zeroed, skip it if you can, otherwise use bdrv_write_zeroes() to optimise the write at least where possible. Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com>
2015-03-19 12:33:32 +00:00
state = (ImgConvertState) {
.src = blk,
.src_sectors = bs_sectors,
.src_num = bs_n,
.total_sectors = total_sectors,
.target = out_blk,
.compressed = compress,
.target_has_backing = (bool) out_baseimg,
.min_sparse = min_sparse,
.cluster_sectors = cluster_sectors,
.buf_sectors = bufsectors,
};
ret = convert_do_copy(&state);
out:
if (!ret) {
qemu_progress_print(100, 0);
}
qemu_progress_end();
qemu_opts_del(opts);
qemu_opts_free(create_opts);
qemu_opts_del(sn_opts);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(out_blk);
g_free(bs);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
if (blk) {
for (bs_i = 0; bs_i < bs_n; bs_i++) {
blk_unref(blk[bs_i]);
}
g_free(blk);
}
g_free(bs_sectors);
fail_getopt:
g_free(options);
if (ret) {
return 1;
}
return 0;
}
static void dump_snapshots(BlockDriverState *bs)
{
QEMUSnapshotInfo *sn_tab, *sn;
int nb_sns, i;
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns <= 0)
return;
printf("Snapshot list:\n");
bdrv_snapshot_dump(fprintf, stdout, NULL);
printf("\n");
for(i = 0; i < nb_sns; i++) {
sn = &sn_tab[i];
bdrv_snapshot_dump(fprintf, stdout, sn);
printf("\n");
}
g_free(sn_tab);
}
static void dump_json_image_info_list(ImageInfoList *list)
{
QString *str;
QObject *obj;
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
Visitor *v = qmp_output_visitor_new(&obj);
visit_type_ImageInfoList(v, NULL, &list, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj);
assert(str != NULL);
printf("%s\n", qstring_get_str(str));
qobject_decref(obj);
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
visit_free(v);
QDECREF(str);
}
static void dump_json_image_info(ImageInfo *info)
{
QString *str;
QObject *obj;
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
Visitor *v = qmp_output_visitor_new(&obj);
visit_type_ImageInfo(v, NULL, &info, &error_abort);
visit_complete(v, &obj);
str = qobject_to_json_pretty(obj);
assert(str != NULL);
printf("%s\n", qstring_get_str(str));
qobject_decref(obj);
qapi: Add new visit_complete() function Making each output visitor provide its own output collection function was the only remaining reason for exposing visitor sub-types to the rest of the code base. Add a polymorphic visit_complete() function which is a no-op for input visitors, and which populates an opaque pointer for output visitors. For maximum type-safety, also add a parameter to the output visitor constructors with a type-correct version of the output pointer, and assert that the two uses match. This approach was considered superior to either passing the output parameter only during construction (action at a distance during visit_free() feels awkward) or only during visit_complete() (defeating type safety makes it easier to use incorrectly). Most callers were function-local, and therefore a mechanical conversion; the testsuite was a bit trickier, but the previous cleanup patch minimized the churn here. The visit_complete() function may be called at most once; doing so lets us use transfer semantics rather than duplication or ref-count semantics to get the just-built output back to the caller, even though it means our behavior is not idempotent. Generated code is simplified as follows for events: |@@ -26,7 +26,7 @@ void qapi_event_send_acpi_device_ost(ACP | QDict *qmp; | Error *err = NULL; | QMPEventFuncEmit emit; |- QmpOutputVisitor *qov; |+ QObject *obj; | Visitor *v; | q_obj_ACPI_DEVICE_OST_arg param = { | info |@@ -39,8 +39,7 @@ void qapi_event_send_acpi_device_ost(ACP | | qmp = qmp_event_build_dict("ACPI_DEVICE_OST"); | |- qov = qmp_output_visitor_new(); |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(&obj); | | visit_start_struct(v, "ACPI_DEVICE_OST", NULL, 0, &err); | if (err) { |@@ -55,7 +54,8 @@ void qapi_event_send_acpi_device_ost(ACP | goto out; | } | |- qdict_put_obj(qmp, "data", qmp_output_get_qobject(qov)); |+ visit_complete(v, &obj); |+ qdict_put_obj(qmp, "data", obj); | emit(QAPI_EVENT_ACPI_DEVICE_OST, qmp, &err); and for commands: | { | Error *err = NULL; |- QmpOutputVisitor *qov = qmp_output_visitor_new(); | Visitor *v; | |- v = qmp_output_get_visitor(qov); |+ v = qmp_output_visitor_new(ret_out); | visit_type_AddfdInfo(v, "unused", &ret_in, &err); |- if (err) { |- goto out; |+ if (!err) { |+ visit_complete(v, ret_out); | } |- *ret_out = qmp_output_get_qobject(qov); |- |-out: | error_propagate(errp, err); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1465490926-28625-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 16:48:43 +00:00
visit_free(v);
QDECREF(str);
}
static void dump_human_image_info_list(ImageInfoList *list)
{
ImageInfoList *elem;
bool delim = false;
for (elem = list; elem; elem = elem->next) {
if (delim) {
printf("\n");
}
delim = true;
bdrv_image_info_dump(fprintf, stdout, elem->value);
}
}
static gboolean str_equal_func(gconstpointer a, gconstpointer b)
{
return strcmp(a, b) == 0;
}
/**
* Open an image file chain and return an ImageInfoList
*
* @filename: topmost image filename
* @fmt: topmost image format (may be NULL to autodetect)
* @chain: true - enumerate entire backing file chain
* false - only topmost image file
*
* Returns a list of ImageInfo objects or NULL if there was an error opening an
* image file. If there was an error a message will have been printed to
* stderr.
*/
static ImageInfoList *collect_image_info_list(bool image_opts,
const char *filename,
const char *fmt,
bool chain)
{
ImageInfoList *head = NULL;
ImageInfoList **last = &head;
GHashTable *filenames;
Error *err = NULL;
filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
while (filename) {
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk;
BlockDriverState *bs;
ImageInfo *info;
ImageInfoList *elem;
if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
error_report("Backing file '%s' creates an infinite loop.",
filename);
goto err;
}
g_hash_table_insert(filenames, (gpointer)filename, NULL);
blk = img_open(image_opts, filename, fmt,
BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false);
if (!blk) {
goto err;
}
bs = blk_bs(blk);
bdrv_query_image_info(bs, &info, &err);
if (err) {
error_report_err(err);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
goto err;
}
elem = g_new0(ImageInfoList, 1);
elem->value = info;
*last = elem;
last = &elem->next;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
filename = fmt = NULL;
if (chain) {
if (info->has_full_backing_filename) {
filename = info->full_backing_filename;
} else if (info->has_backing_filename) {
error_report("Could not determine absolute backing filename,"
" but backing filename '%s' present",
info->backing_filename);
goto err;
}
if (info->has_backing_filename_format) {
fmt = info->backing_filename_format;
}
}
}
g_hash_table_destroy(filenames);
return head;
err:
qapi_free_ImageInfoList(head);
g_hash_table_destroy(filenames);
return NULL;
}
static int img_info(int argc, char **argv)
{
int c;
OutputFormat output_format = OFORMAT_HUMAN;
bool chain = false;
const char *filename, *fmt, *output;
ImageInfoList *list;
bool image_opts = false;
fmt = NULL;
output = NULL;
for(;;) {
int option_index = 0;
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"format", required_argument, 0, 'f'},
{"output", required_argument, 0, OPTION_OUTPUT},
{"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:h",
long_options, &option_index);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case OPTION_OUTPUT:
output = optarg;
break;
case OPTION_BACKING_CHAIN:
chain = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
if (output && !strcmp(output, "json")) {
output_format = OFORMAT_JSON;
} else if (output && !strcmp(output, "human")) {
output_format = OFORMAT_HUMAN;
} else if (output) {
error_report("--output must be used with human or json as argument.");
return 1;
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
list = collect_image_info_list(image_opts, filename, fmt, chain);
if (!list) {
return 1;
}
switch (output_format) {
case OFORMAT_HUMAN:
dump_human_image_info_list(list);
break;
case OFORMAT_JSON:
if (chain) {
dump_json_image_info_list(list);
} else {
dump_json_image_info(list->value);
}
break;
}
qapi_free_ImageInfoList(list);
return 0;
}
static void dump_map_entry(OutputFormat output_format, MapEntry *e,
MapEntry *next)
{
switch (output_format) {
case OFORMAT_HUMAN:
if (e->data && !e->has_offset) {
error_report("File contains external, encrypted or compressed clusters.");
exit(1);
}
if (e->data && !e->zero) {
printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
e->start, e->length,
e->has_offset ? e->offset : 0,
e->has_filename ? e->filename : "");
}
/* This format ignores the distinction between 0, ZERO and ZERO|DATA.
* Modify the flags here to allow more coalescing.
*/
if (next && (!next->data || next->zero)) {
next->data = false;
next->zero = true;
}
break;
case OFORMAT_JSON:
printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
" \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
(e->start == 0 ? "[" : ",\n"),
e->start, e->length, e->depth,
e->zero ? "true" : "false",
e->data ? "true" : "false");
if (e->has_offset) {
printf(", \"offset\": %"PRId64"", e->offset);
}
putchar('}');
if (!next) {
printf("]\n");
}
break;
}
}
static int get_block_status(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, MapEntry *e)
{
int64_t ret;
int depth;
BlockDriverState *file;
bool has_offset;
/* As an optimization, we could cache the current range of unallocated
* clusters in each file of the chain, and avoid querying the same
* range repeatedly.
*/
depth = 0;
for (;;) {
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
&file);
if (ret < 0) {
return ret;
}
assert(nb_sectors);
if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
break;
}
bs = backing_bs(bs);
if (bs == NULL) {
ret = 0;
break;
}
depth++;
}
has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
*e = (MapEntry) {
.start = sector_num * BDRV_SECTOR_SIZE,
.length = nb_sectors * BDRV_SECTOR_SIZE,
.data = !!(ret & BDRV_BLOCK_DATA),
.zero = !!(ret & BDRV_BLOCK_ZERO),
.offset = ret & BDRV_BLOCK_OFFSET_MASK,
.has_offset = has_offset,
.depth = depth,
.has_filename = file && has_offset,
.filename = file && has_offset ? file->filename : NULL,
};
return 0;
}
static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
{
if (curr->length == 0) {
return false;
}
if (curr->zero != next->zero ||
curr->data != next->data ||
curr->depth != next->depth ||
curr->has_filename != next->has_filename ||
curr->has_offset != next->has_offset) {
return false;
}
if (curr->has_filename && strcmp(curr->filename, next->filename)) {
return false;
}
if (curr->has_offset && curr->offset + curr->length != next->offset) {
return false;
}
return true;
}
static int img_map(int argc, char **argv)
{
int c;
OutputFormat output_format = OFORMAT_HUMAN;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk;
BlockDriverState *bs;
const char *filename, *fmt, *output;
int64_t length;
MapEntry curr = { .length = 0 }, next;
int ret = 0;
bool image_opts = false;
fmt = NULL;
output = NULL;
for (;;) {
int option_index = 0;
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"format", required_argument, 0, 'f'},
{"output", required_argument, 0, OPTION_OUTPUT},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:h",
long_options, &option_index);
if (c == -1) {
break;
}
switch (c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case OPTION_OUTPUT:
output = optarg;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind];
if (output && !strcmp(output, "json")) {
output_format = OFORMAT_JSON;
} else if (output && !strcmp(output, "human")) {
output_format = OFORMAT_HUMAN;
} else if (output) {
error_report("--output must be used with human or json as argument.");
return 1;
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
blk = img_open(image_opts, filename, fmt, 0, false, false);
if (!blk) {
return 1;
}
bs = blk_bs(blk);
if (output_format == OFORMAT_HUMAN) {
printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
}
length = blk_getlength(blk);
while (curr.start + curr.length < length) {
int64_t nsectors_left;
int64_t sector_num;
int n;
sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
/* Probe up to 1 GiB at a time. */
nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
ret = get_block_status(bs, sector_num, n, &next);
if (ret < 0) {
error_report("Could not read file metadata: %s", strerror(-ret));
goto out;
}
if (entry_mergeable(&curr, &next)) {
curr.length += next.length;
continue;
}
if (curr.length > 0) {
dump_map_entry(output_format, &curr, &next);
}
curr = next;
}
dump_map_entry(output_format, &curr, NULL);
out:
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
return ret < 0;
}
#define SNAPSHOT_LIST 1
#define SNAPSHOT_CREATE 2
#define SNAPSHOT_APPLY 3
#define SNAPSHOT_DELETE 4
static int img_snapshot(int argc, char **argv)
{
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk;
BlockDriverState *bs;
QEMUSnapshotInfo sn;
char *filename, *snapshot_name = NULL;
int c, ret = 0, bdrv_oflags;
int action = 0;
qemu_timeval tv;
bool quiet = false;
Error *err = NULL;
bool image_opts = false;
bdrv_oflags = BDRV_O_RDWR;
/* Parse commandline parameters */
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "la:c:d:hq",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
return 0;
case 'l':
if (action) {
error_exit("Cannot mix '-l', '-a', '-c', '-d'");
return 0;
}
action = SNAPSHOT_LIST;
bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
break;
case 'a':
if (action) {
error_exit("Cannot mix '-l', '-a', '-c', '-d'");
return 0;
}
action = SNAPSHOT_APPLY;
snapshot_name = optarg;
break;
case 'c':
if (action) {
error_exit("Cannot mix '-l', '-a', '-c', '-d'");
return 0;
}
action = SNAPSHOT_CREATE;
snapshot_name = optarg;
break;
case 'd':
if (action) {
error_exit("Cannot mix '-l', '-a', '-c', '-d'");
return 0;
}
action = SNAPSHOT_DELETE;
snapshot_name = optarg;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
/* Open the image */
blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet);
if (!blk) {
return 1;
}
bs = blk_bs(blk);
/* Perform the requested action */
switch(action) {
case SNAPSHOT_LIST:
dump_snapshots(bs);
break;
case SNAPSHOT_CREATE:
memset(&sn, 0, sizeof(sn));
pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
qemu_gettimeofday(&tv);
sn.date_sec = tv.tv_sec;
sn.date_nsec = tv.tv_usec * 1000;
ret = bdrv_snapshot_create(bs, &sn);
if (ret) {
error_report("Could not create snapshot '%s': %d (%s)",
snapshot_name, ret, strerror(-ret));
}
break;
case SNAPSHOT_APPLY:
ret = bdrv_snapshot_goto(bs, snapshot_name);
if (ret) {
error_report("Could not apply snapshot '%s': %d (%s)",
snapshot_name, ret, strerror(-ret));
}
break;
case SNAPSHOT_DELETE:
bdrv_snapshot_delete_by_id_or_name(bs, snapshot_name, &err);
if (err) {
error_reportf_err(err, "Could not delete snapshot '%s': ",
snapshot_name);
ret = 1;
}
break;
}
/* Cleanup */
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
if (ret) {
return 1;
}
return 0;
}
static int img_rebase(int argc, char **argv)
{
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
uint8_t *buf_old = NULL;
uint8_t *buf_new = NULL;
BlockDriverState *bs = NULL;
char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
int c, flags, src_flags, ret;
bool writethrough, src_writethrough;
int unsafe = 0;
int progress = 0;
bool quiet = false;
Error *local_err = NULL;
bool image_opts = false;
/* Parse commandline parameters */
fmt = NULL;
cache = BDRV_DEFAULT_CACHE;
src_cache = BDRV_DEFAULT_CACHE;
out_baseimg = NULL;
out_basefmt = NULL;
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
return 0;
case 'f':
fmt = optarg;
break;
case 'F':
out_basefmt = optarg;
break;
case 'b':
out_baseimg = optarg;
break;
case 'u':
unsafe = 1;
break;
case 'p':
progress = 1;
break;
case 't':
cache = optarg;
break;
case 'T':
src_cache = optarg;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (quiet) {
progress = 0;
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
if (!unsafe && !out_baseimg) {
error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
}
filename = argv[optind++];
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
qemu_progress_init(progress, 2.0);
qemu_progress_print(0, 100);
flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
goto out;
}
src_flags = 0;
ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
if (ret < 0) {
error_report("Invalid source cache option: %s", src_cache);
goto out;
}
/* The source files are opened read-only, don't care about WCE */
assert((src_flags & BDRV_O_RDWR) == 0);
(void) src_writethrough;
/*
* Open the images.
*
* Ignore the old backing file for unsafe rebase in case we want to correct
* the reference to a renamed or moved backing file.
*/
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
ret = -1;
goto out;
}
bs = blk_bs(blk);
if (out_basefmt != NULL) {
if (bdrv_find_format(out_basefmt) == NULL) {
error_report("Invalid format name: '%s'", out_basefmt);
ret = -1;
goto out;
}
}
/* For safe rebasing we need to compare old and new backing file */
if (!unsafe) {
char backing_name[PATH_MAX];
QDict *options = NULL;
if (bs->backing_format[0] != '\0') {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(bs->backing_format));
}
bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
blk_old_backing = blk_new_open(backing_name, NULL,
options, src_flags, &local_err);
if (!blk_old_backing) {
error_reportf_err(local_err,
"Could not open old backing file '%s': ",
backing_name);
goto out;
}
if (out_baseimg[0]) {
if (out_basefmt) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(out_basefmt));
} else {
options = NULL;
}
blk_new_backing = blk_new_open(out_baseimg, NULL,
options, src_flags, &local_err);
if (!blk_new_backing) {
error_reportf_err(local_err,
"Could not open new backing file '%s': ",
out_baseimg);
goto out;
}
}
}
/*
* Check each unallocated cluster in the COW file. If it is unallocated,
* accesses go to the backing file. We must therefore compare this cluster
* in the old and new backing file, and if they differ we need to copy it
* from the old backing file into the COW file.
*
* If qemu-img crashes during this step, no harm is done. The content of
* the image is the same as the original one at any time.
*/
if (!unsafe) {
int64_t num_sectors;
int64_t old_backing_num_sectors;
int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
float local_progress = 0;
buf_old = blk_blockalign(blk, IO_BUF_SIZE);
buf_new = blk_blockalign(blk, IO_BUF_SIZE);
num_sectors = blk_nb_sectors(blk);
if (num_sectors < 0) {
error_report("Could not get size of '%s': %s",
filename, strerror(-num_sectors));
ret = -1;
goto out;
}
old_backing_num_sectors = blk_nb_sectors(blk_old_backing);
if (old_backing_num_sectors < 0) {
char backing_name[PATH_MAX];
bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
error_report("Could not get size of '%s': %s",
backing_name, strerror(-old_backing_num_sectors));
ret = -1;
goto out;
}
if (blk_new_backing) {
new_backing_num_sectors = blk_nb_sectors(blk_new_backing);
if (new_backing_num_sectors < 0) {
error_report("Could not get size of '%s': %s",
out_baseimg, strerror(-new_backing_num_sectors));
ret = -1;
goto out;
}
}
if (num_sectors != 0) {
local_progress = (float)100 /
(num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
}
for (sector = 0; sector < num_sectors; sector += n) {
/* How many sectors can we handle with the next read? */
if (sector + (IO_BUF_SIZE / 512) <= num_sectors) {
n = (IO_BUF_SIZE / 512);
} else {
n = num_sectors - sector;
}
/* If the cluster is allocated, we don't need to take action */
ret = bdrv_is_allocated(bs, sector, n, &n);
if (ret < 0) {
error_report("error while reading image metadata: %s",
strerror(-ret));
goto out;
}
if (ret) {
continue;
}
/*
* Read old and new backing file and take into consideration that
* backing files may be smaller than the COW image.
*/
if (sector >= old_backing_num_sectors) {
memset(buf_old, 0, n * BDRV_SECTOR_SIZE);
} else {
if (sector + n > old_backing_num_sectors) {
n = old_backing_num_sectors - sector;
}
ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
buf_old, n << BDRV_SECTOR_BITS);
if (ret < 0) {
error_report("error while reading from old backing file");
goto out;
}
}
if (sector >= new_backing_num_sectors || !blk_new_backing) {
memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
} else {
if (sector + n > new_backing_num_sectors) {
n = new_backing_num_sectors - sector;
}
ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
buf_new, n << BDRV_SECTOR_BITS);
if (ret < 0) {
error_report("error while reading from new backing file");
goto out;
}
}
/* If they differ, we need to write to the COW file */
uint64_t written = 0;
while (written < n) {
int pnum;
if (compare_sectors(buf_old + written * 512,
buf_new + written * 512, n - written, &pnum))
{
ret = blk_pwrite(blk,
(sector + written) << BDRV_SECTOR_BITS,
buf_old + written * 512,
pnum << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
error_report("Error while writing to COW image: %s",
strerror(-ret));
goto out;
}
}
written += pnum;
}
qemu_progress_print(local_progress, 100);
}
}
/*
* Change the backing file. All clusters that are different from the old
* backing file are overwritten in the COW file now, so the visible content
* doesn't change when we switch the backing file.
*/
if (out_baseimg && *out_baseimg) {
ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
} else {
ret = bdrv_change_backing_file(bs, NULL, NULL);
}
if (ret == -ENOSPC) {
error_report("Could not change the backing file to '%s': No "
"space left in the file header", out_baseimg);
} else if (ret < 0) {
error_report("Could not change the backing file to '%s': %s",
out_baseimg, strerror(-ret));
}
qemu_progress_print(100, 0);
/*
* TODO At this point it is possible to check if any clusters that are
* allocated in the COW file are the same in the backing file. If so, they
* could be dropped from the COW file. Don't do this before switching the
* backing file, in case of a crash this would lead to corruption.
*/
out:
qemu_progress_end();
/* Cleanup */
if (!unsafe) {
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk_old_backing);
blk_unref(blk_new_backing);
}
qemu_vfree(buf_old);
qemu_vfree(buf_new);
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
if (ret) {
return 1;
}
return 0;
}
static int img_resize(int argc, char **argv)
{
Error *err = NULL;
int c, ret, relative;
const char *filename, *fmt, *size;
int64_t n, total_size;
bool quiet = false;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk = NULL;
QemuOpts *param;
static QemuOptsList resize_options = {
.name = "resize_options",
.head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
}, {
/* end of list */
}
},
};
bool image_opts = false;
/* Remove size from argv manually so that negative numbers are not treated
* as options by getopt. */
if (argc < 3) {
error_exit("Not enough arguments");
return 1;
}
size = argv[--argc];
/* Parse getopt arguments */
fmt = NULL;
for(;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "f:hq",
long_options, NULL);
if (c == -1) {
break;
}
switch(c) {
case '?':
case 'h':
help();
break;
case 'f':
fmt = optarg;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT: {
QemuOpts *opts;
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
return 1;
}
} break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[optind++];
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
return 1;
}
/* Choose grow, shrink, or absolute resize mode */
switch (size[0]) {
case '+':
relative = 1;
size++;
break;
case '-':
relative = -1;
size++;
break;
default:
relative = 0;
break;
}
/* Parse size */
param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
if (err) {
error_report_err(err);
ret = -1;
qemu_opts_del(param);
goto out;
}
n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
qemu_opts_del(param);
blk = img_open(image_opts, filename, fmt,
BDRV_O_RDWR, false, quiet);
if (!blk) {
ret = -1;
goto out;
}
if (relative) {
total_size = blk_getlength(blk) + n * relative;
} else {
total_size = n;
}
if (total_size <= 0) {
error_report("New image size must be positive");
ret = -1;
goto out;
}
ret = blk_truncate(blk, total_size);
switch (ret) {
case 0:
qprintf(quiet, "Image resized.\n");
break;
case -ENOTSUP:
error_report("This image does not support resize");
break;
case -EACCES:
error_report("Image is read-only");
break;
default:
error_report("Error resizing image: %s", strerror(-ret));
break;
}
out:
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
if (ret) {
return 1;
}
return 0;
}
static void amend_status_cb(BlockDriverState *bs,
int64_t offset, int64_t total_work_size,
void *opaque)
{
qemu_progress_print(100.f * offset / total_work_size, 0);
}
static int img_amend(int argc, char **argv)
{
Error *err = NULL;
int c, ret = 0;
char *options = NULL;
QemuOptsList *create_opts = NULL;
QemuOpts *opts = NULL;
const char *fmt = NULL, *filename, *cache;
int flags;
bool writethrough;
bool quiet = false, progress = false;
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
bool image_opts = false;
cache = BDRV_DEFAULT_CACHE;
for (;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"object", required_argument, 0, OPTION_OBJECT},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "ho:f:t:pq",
long_options, NULL);
if (c == -1) {
break;
}
switch (c) {
case 'h':
case '?':
help();
break;
case 'o':
if (!is_valid_option_list(optarg)) {
error_report("Invalid option list: %s", optarg);
ret = -1;
goto out_no_progress;
}
if (!options) {
options = g_strdup(optarg);
} else {
char *old_options = options;
options = g_strdup_printf("%s,%s", options, optarg);
g_free(old_options);
}
break;
case 'f':
fmt = optarg;
break;
case 't':
cache = optarg;
break;
case 'p':
progress = true;
break;
case 'q':
quiet = true;
break;
case OPTION_OBJECT:
opts = qemu_opts_parse_noisily(&qemu_object_opts,
optarg, true);
if (!opts) {
ret = -1;
goto out_no_progress;
}
break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (!options) {
error_exit("Must specify options (-o)");
}
if (qemu_opts_foreach(&qemu_object_opts,
user_creatable_add_opts_foreach,
qom: -object error messages lost location, restore it qemu_opts_foreach() runs its callback with the error location set to the option's location. Any errors the callback reports use the option's location automatically. Commit 90998d5 moved the actual error reporting from "inside" qemu_opts_foreach() to after it. Here's a typical hunk: if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_initial, NULL)) { + user_creatable_add_opts_foreach, + object_create_initial, &err)) { + error_report_err(err); exit(1); } Before, object_create() reports from within qemu_opts_foreach(), using the option's location. Afterwards, we do it after qemu_opts_foreach(), using whatever location happens to be current there. Commonly a "none" location. This is because Error objects don't have location information. Problematic. Reproducer: $ qemu-system-x86_64 -nodefaults -display none -object secret,id=foo,foo=bar qemu-system-x86_64: Property '.foo' not found Note no location. This commit restores it: qemu-system-x86_64: -object secret,id=foo,foo=bar: Property '.foo' not found Note that the qemu_opts_foreach() bug just fixed could mask the bug here: if the location it leaves dangling hasn't been clobbered, yet, it's the correct one. Reported-by: Eric Blake <eblake@redhat.com> Cc: Daniel P. Berrange <berrange@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com> Message-Id: <1461767349-15329-4-git-send-email-armbru@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> [Paragraph on Error added to commit message]
2016-04-27 14:29:09 +00:00
NULL, NULL)) {
ret = -1;
goto out_no_progress;
}
if (quiet) {
progress = false;
}
qemu_progress_init(progress, 1.0);
filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
if (fmt && has_help_option(options)) {
/* If a format is explicitly specified (and possibly no filename is
* given), print option help here */
ret = print_block_option_help(filename, fmt);
goto out;
}
if (optind != argc - 1) {
error_report("Expecting one image file name");
ret = -1;
goto out;
}
flags = BDRV_O_RDWR;
ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache option: %s", cache);
goto out;
}
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
ret = -1;
goto out;
}
bs = blk_bs(blk);
fmt = bs->drv->format_name;
if (has_help_option(options)) {
/* If the format was auto-detected, print option help here */
ret = print_block_option_help(filename, fmt);
goto out;
}
if (!bs->drv->create_opts) {
error_report("Format driver '%s' does not support any options to amend",
fmt);
ret = -1;
goto out;
}
create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
if (options) {
qemu_opts_do_parse(opts, options, NULL, &err);
if (err) {
error_report_err(err);
ret = -1;
goto out;
}
}
/* In case the driver does not call amend_status_cb() */
qemu_progress_print(0.f, 0);
ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL);
qemu_progress_print(100.f, 0);
if (ret < 0) {
error_report("Error while amending options: %s", strerror(-ret));
goto out;
}
out:
qemu_progress_end();
out_no_progress:
block: New BlockBackend A block device consists of a frontend device model and a backend. A block backend has a tree of block drivers doing the actual work. The tree is managed by the block layer. We currently use a single abstraction BlockDriverState both for tree nodes and the backend as a whole. Drawbacks: * Its API includes both stuff that makes sense only at the block backend level (root of the tree) and stuff that's only for use within the block layer. This makes the API bigger and more complex than necessary. Moreover, it's not obvious which interfaces are meant for device models, and which really aren't. * Since device models keep a reference to their backend, the backend object can't just be destroyed. But for media change, we need to replace the tree. Our solution is to make the BlockDriverState generic, with actual driver state in a separate object, pointed to by member opaque. That lets us replace the tree by deinitializing and reinitializing its root. This special need of the root makes the data structure awkward everywhere in the tree. The general plan is to separate the APIs into "block backend", for use by device models, monitor and whatever other code dealing with block backends, and "block driver", for use by the block layer and whatever other code (if any) dealing with trees and tree nodes. Code dealing with block backends, device models in particular, should become completely oblivious of BlockDriverState. This should let us clean up both APIs, and the tree data structures. This commit is a first step. It creates a minimal "block backend" API: type BlockBackend and functions to create, destroy and find them. BlockBackend objects are created and destroyed exactly when root BlockDriverState objects are created and destroyed. "Root" in the sense of "in bdrv_states". They're not yet used for anything; that'll come shortly. A root BlockDriverState is created with bdrv_new_root(), so where to create a BlockBackend is obvious. Where these roots get destroyed isn't always as obvious. It is obvious in qemu-img.c, qemu-io.c and qemu-nbd.c, and in error paths of blockdev_init(), blk_connect(). That leaves destruction of objects successfully created by blockdev_init() and blk_connect(). blockdev_init() is used only by drive_new() and qmp_blockdev_add(). Objects created by the latter are currently indestructible (see commit 48f364d "blockdev: Refuse to drive_del something added with blockdev-add" and commit 2d246f0 "blockdev: Introduce DriveInfo.enable_auto_del"). Objects created by the former get destroyed by drive_del(). Objects created by blk_connect() get destroyed by blk_disconnect(). BlockBackend is reference-counted. Its reference count never exceeds one so far, but that's going to change. In drive_del(), the BB's reference count is surely one now. The BDS's reference count is greater than one when something else is holding a reference, such as a block job. In this case, the BB is destroyed right away, but the BDS lives on until all extra references get dropped. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-10-07 11:59:04 +00:00
blk_unref(blk);
qemu_opts_del(opts);
qemu_opts_free(create_opts);
g_free(options);
if (ret) {
return 1;
}
return 0;
}
typedef struct BenchData {
BlockBackend *blk;
uint64_t image_size;
bool write;
int bufsize;
int step;
int nrreq;
int n;
int flush_interval;
bool drain_on_flush;
uint8_t *buf;
QEMUIOVector *qiov;
int in_flight;
bool in_flush;
uint64_t offset;
} BenchData;
static void bench_undrained_flush_cb(void *opaque, int ret)
{
if (ret < 0) {
error_report("Failed flush request: %s", strerror(-ret));
exit(EXIT_FAILURE);
}
}
static void bench_cb(void *opaque, int ret)
{
BenchData *b = opaque;
BlockAIOCB *acb;
if (ret < 0) {
error_report("Failed request: %s", strerror(-ret));
exit(EXIT_FAILURE);
}
if (b->in_flush) {
/* Just finished a flush with drained queue: Start next requests */
assert(b->in_flight == 0);
b->in_flush = false;
} else if (b->in_flight > 0) {
int remaining = b->n - b->in_flight;
b->n--;
b->in_flight--;
/* Time for flush? Drain queue if requested, then flush */
if (b->flush_interval && remaining % b->flush_interval == 0) {
if (!b->in_flight || !b->drain_on_flush) {
BlockCompletionFunc *cb;
if (b->drain_on_flush) {
b->in_flush = true;
cb = bench_cb;
} else {
cb = bench_undrained_flush_cb;
}
acb = blk_aio_flush(b->blk, cb, b);
if (!acb) {
error_report("Failed to issue flush request");
exit(EXIT_FAILURE);
}
}
if (b->drain_on_flush) {
return;
}
}
}
while (b->n > b->in_flight && b->in_flight < b->nrreq) {
if (b->write) {
acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0,
bench_cb, b);
} else {
acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0,
bench_cb, b);
}
if (!acb) {
error_report("Failed to issue request");
exit(EXIT_FAILURE);
}
b->in_flight++;
b->offset += b->step;
b->offset %= b->image_size;
}
}
static int img_bench(int argc, char **argv)
{
int c, ret = 0;
const char *fmt = NULL, *filename;
bool quiet = false;
bool image_opts = false;
bool is_write = false;
int count = 75000;
int depth = 64;
int64_t offset = 0;
size_t bufsize = 4096;
int pattern = 0;
size_t step = 0;
int flush_interval = 0;
bool drain_on_flush = true;
int64_t image_size;
BlockBackend *blk = NULL;
BenchData data = {};
int flags = 0;
bool writethrough = false;
struct timeval t1, t2;
int i;
for (;;) {
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
{"pattern", required_argument, 0, OPTION_PATTERN},
{"no-drain", no_argument, 0, OPTION_NO_DRAIN},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
if (c == -1) {
break;
}
switch (c) {
case 'h':
case '?':
help();
break;
case 'c':
{
char *end;
errno = 0;
count = strtoul(optarg, &end, 0);
if (errno || *end || count > INT_MAX) {
error_report("Invalid request count specified");
return 1;
}
break;
}
case 'd':
{
char *end;
errno = 0;
depth = strtoul(optarg, &end, 0);
if (errno || *end || depth > INT_MAX) {
error_report("Invalid queue depth specified");
return 1;
}
break;
}
case 'f':
fmt = optarg;
break;
case 'n':
flags |= BDRV_O_NATIVE_AIO;
break;
case 'o':
{
char *end;
errno = 0;
offset = qemu_strtosz_suffix(optarg, &end,
QEMU_STRTOSZ_DEFSUFFIX_B);
if (offset < 0|| *end) {
error_report("Invalid offset specified");
return 1;
}
break;
}
break;
case 'q':
quiet = true;
break;
case 's':
{
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || sval > INT_MAX || *end) {
error_report("Invalid buffer size specified");
return 1;
}
bufsize = sval;
break;
}
case 'S':
{
int64_t sval;
char *end;
sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
if (sval < 0 || sval > INT_MAX || *end) {
error_report("Invalid step size specified");
return 1;
}
step = sval;
break;
}
case 't':
ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
if (ret < 0) {
error_report("Invalid cache mode");
ret = -1;
goto out;
}
break;
case 'w':
flags |= BDRV_O_RDWR;
is_write = true;
break;
case OPTION_PATTERN:
{
char *end;
errno = 0;
pattern = strtoul(optarg, &end, 0);
if (errno || *end || pattern > 0xff) {
error_report("Invalid pattern byte specified");
return 1;
}
break;
}
case OPTION_FLUSH_INTERVAL:
{
char *end;
errno = 0;
flush_interval = strtoul(optarg, &end, 0);
if (errno || *end || flush_interval > INT_MAX) {
error_report("Invalid flush interval specified");
return 1;
}
break;
}
case OPTION_NO_DRAIN:
drain_on_flush = false;
break;
case OPTION_IMAGE_OPTS:
image_opts = true;
break;
}
}
if (optind != argc - 1) {
error_exit("Expecting one image file name");
}
filename = argv[argc - 1];
if (!is_write && flush_interval) {
error_report("--flush-interval is only available in write tests");
ret = -1;
goto out;
}
if (flush_interval && flush_interval < depth) {
error_report("Flush interval can't be smaller than depth");
ret = -1;
goto out;
}
blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
if (!blk) {
ret = -1;
goto out;
}
image_size = blk_getlength(blk);
if (image_size < 0) {
ret = image_size;
goto out;
}
data = (BenchData) {
.blk = blk,
.image_size = image_size,
.bufsize = bufsize,
.step = step ?: bufsize,
.nrreq = depth,
.n = count,
.offset = offset,
.write = is_write,
.flush_interval = flush_interval,
.drain_on_flush = drain_on_flush,
};
printf("Sending %d %s requests, %d bytes each, %d in parallel "
"(starting at offset %" PRId64 ", step size %d)\n",
data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
data.offset, data.step);
if (flush_interval) {
printf("Sending flush every %d requests\n", flush_interval);
}
data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
memset(data.buf, pattern, data.nrreq * data.bufsize);
data.qiov = g_new(QEMUIOVector, data.nrreq);
for (i = 0; i < data.nrreq; i++) {
qemu_iovec_init(&data.qiov[i], 1);
qemu_iovec_add(&data.qiov[i],
data.buf + i * data.bufsize, data.bufsize);
}
gettimeofday(&t1, NULL);
bench_cb(&data, 0);
while (data.n > 0) {
main_loop_wait(false);
}
gettimeofday(&t2, NULL);
printf("Run completed in %3.3f seconds.\n",
(t2.tv_sec - t1.tv_sec)
+ ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
out:
qemu_vfree(data.buf);
blk_unref(blk);
if (ret) {
return 1;
}
return 0;
}
static const img_cmd_t img_cmds[] = {
#define DEF(option, callback, arg_string) \
{ option, callback },
#include "qemu-img-cmds.h"
#undef DEF
#undef GEN_DOCS
{ NULL, NULL, },
};
int main(int argc, char **argv)
{
const img_cmd_t *cmd;
const char *cmdname;
Error *local_error = NULL;
char *trace_file = NULL;
int c;
static const struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'},
{"trace", required_argument, NULL, 'T'},
{0, 0, 0, 0}
};
#ifdef CONFIG_POSIX
signal(SIGPIPE, SIG_IGN);
#endif
error_set_progname(argv[0]);
qemu_init_exec_dir(argv[0]);
if (qemu_init_main_loop(&local_error)) {
error_report_err(local_error);
exit(EXIT_FAILURE);
}
qcrypto_init(&error_fatal);
module_call_init(MODULE_INIT_QOM);
bdrv_init();
if (argc < 2) {
error_exit("Not enough arguments");
}
qemu_add_opts(&qemu_object_opts);
qemu_add_opts(&qemu_source_opts);
qemu_add_opts(&qemu_trace_opts);
while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
switch (c) {
case 'h':
help();
return 0;
case 'V':
printf(QEMU_IMG_VERSION);
return 0;
case 'T':
g_free(trace_file);
trace_file = trace_opt_parse(optarg);
break;
}
}
cmdname = argv[optind];
/* reset getopt_long scanning */
argc -= optind;
if (argc < 1) {
return 0;
}
argv += optind;
optind = 0;
if (!trace_init_backends()) {
exit(1);
}
trace_init_file(trace_file);
qemu_set_log(LOG_TRACE);
/* find the command */
for (cmd = img_cmds; cmd->name != NULL; cmd++) {
if (!strcmp(cmdname, cmd->name)) {
return cmd->handler(argc, argv);
}
}
/* not found */
error_exit("Command not found: %s", cmdname);
}