mirror: allow customizing the granularity

The desired granularity may be very different depending on the kind of
operation (e.g. continuous replication vs. collapse-to-raw) and whether
the VM is expected to perform lots of I/O while mirroring is in progress.

Allow the user to customize it, while providing a sane default so that
in general there will be no extra allocated space in the target compared
to the source.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
This commit is contained in:
Paolo Bonzini 2013-01-21 17:09:46 +01:00 committed by Kevin Wolf
parent 50717e941b
commit eee13dfe30
6 changed files with 64 additions and 24 deletions

View file

@ -17,9 +17,6 @@
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
#define BLOCK_SIZE (1 << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct MirrorBlockJob {
@ -31,6 +28,7 @@ typedef struct MirrorBlockJob {
bool synced;
bool should_complete;
int64_t sector_num;
int64_t granularity;
size_t buf_size;
unsigned long *cow_bitmap;
HBitmapIter hbi;
@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
BlockDriverState *source = s->common.bs;
BlockDriverState *target = s->target;
QEMUIOVector qiov;
int ret, nb_sectors;
int ret, nb_sectors, sectors_per_chunk;
int64_t end, sector_num, chunk_num;
struct iovec iov;
@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
* is very large, we need to do COW ourselves. The first time a cluster is
* copied, copy it entirely.
*
* Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
* powers of two, the number of sectors to copy cannot exceed one cluster.
* Because both the granularity and the cluster size are powers of two, the
* number of sectors to copy cannot exceed one cluster.
*/
sector_num = s->sector_num;
nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = sector_num / sectors_per_chunk;
if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
trace_mirror_cow(s, sector_num);
bdrv_round_to_clusters(s->target,
sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
sector_num, sectors_per_chunk,
&sector_num, &nb_sectors);
}
@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
goto fail;
}
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
nb_sectors / sectors_per_chunk);
}
return 0;
@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
int64_t sector_num, end, length;
int64_t sector_num, end, sectors_per_chunk, length;
BlockDriverInfo bdi;
char backing_filename[1024];
int ret = 0;
@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
sizeof(backing_filename));
if (backing_filename[0] && !s->target->backing_hd) {
bdrv_get_info(s->target, &bdi);
if (s->buf_size < bdi.cluster_size) {
if (s->granularity < bdi.cluster_size) {
s->buf_size = bdi.cluster_size;
length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
s->cow_bitmap = bitmap_new(length);
}
}
end = s->common.len >> BDRV_SECTOR_BITS;
s->buf = qemu_blockalign(bs, s->buf_size);
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
if (s->mode != MIRROR_SYNC_MODE_NONE) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base;
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
for (sector_num = 0; sector_num < end; ) {
int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
ret = bdrv_co_is_allocated_above(bs, base,
sector_num, next - sector_num, &n);
@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
} else {
delay_ns = 0;
}
@ -332,7 +331,7 @@ static BlockJobType mirror_job_type = {
};
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode mode,
int64_t speed, int64_t granularity, MirrorSyncMode mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,
@ -340,6 +339,20 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
{
MirrorBlockJob *s;
if (granularity == 0) {
/* Choose the default granularity based on the target file's cluster
* size, clamped between 4k and 64k. */
BlockDriverInfo bdi;
if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
granularity = MAX(4096, bdi.cluster_size);
granularity = MIN(65536, granularity);
} else {
granularity = 65536;
}
}
assert ((granularity & (granularity - 1)) == 0);
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
!bdrv_iostatus_is_enabled(bs)) {
@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
s->buf_size = BLOCK_SIZE;
s->granularity = granularity;
s->buf_size = granularity;
bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
bdrv_set_dirty_tracking(bs, granularity);
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);

View file

@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target,
if (!has_mode) {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
if (!has_granularity) {
granularity = 0;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_set(errp, QERR_INVALID_PARAMETER, device);
return;
}
if (granularity & (granularity - 1)) {
error_set(errp, QERR_INVALID_PARAMETER, device);
return;
}
bs = bdrv_find(device);
if (!bs) {
@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
mirror_start(bs, target_bs, speed, granularity, sync,
on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
bdrv_delete(target_bs);

2
hmp.c
View file

@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
qmp_drive_mirror(device, filename, !!format, format,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
true, mode, false, 0,
true, mode, false, 0, false, 0,
false, 0, false, 0, &errp);
hmp_handle_error(mon, &errp);
}

View file

@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs: Block device to operate on.
* @target: Block device to write to.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @granularity: The chosen granularity for the dirty bitmap.
* @mode: Whether to collapse all images in the chain to the target.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs will be switched to read from @target.
*/
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode mode,
int64_t speed, int64_t granularity, MirrorSyncMode mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,

View file

@ -1636,6 +1636,11 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
# @granularity: #optional granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
# power of 2 between 512 and 64M (since 1.4).
#
# @on-source-error: #optional the action to take on an error on the source,
# default 'report'. 'stop' and 'enospc' can only be used
# if the block device supports io-status (see BlockInfo).
@ -1652,7 +1657,8 @@
{ 'command': 'drive-mirror',
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
'*speed': 'int', '*on-source-error': 'BlockdevOnError',
'*speed': 'int', '*granularity': 'uint32',
'*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError' } }
##

View file

@ -938,7 +938,8 @@ EQMP
{
.name = "drive-mirror",
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
"on-source-error:s?,on-target-error:s?",
"on-source-error:s?,on-target-error:s?,"
"granularity:i?",
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
},
@ -962,6 +963,7 @@ Arguments:
file/device (NewImageMode, optional, default 'absolute-paths')
- "speed": maximum speed of the streaming job, in bytes per second
(json-int)
- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
- "sync": what parts of the disk image should be copied to the destination;
possibilities include "full" for all the disk, "top" for only the sectors
allocated in the topmost image, or "none" to only replicate new I/O
@ -971,6 +973,10 @@ Arguments:
- "on-target-error": the action to take on an error on the target
(BlockdevOnError, default 'report')
The default value of the granularity is the image cluster size clamped
between 4096 and 65536, if the image format defines one. If the format
does not define a cluster size, the default value of the granularity
is 65536.
Example: