Block patches

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJT7gYeAAoJEH8JsnLIjy/WWhoQAIcD8jfk/pl5tFXpayMLzDis
 ihaqYc9IJV29L+cIF0YFAclLkCKrmfJg49vhxomIq2WxxAZ9qX6lfi2V602qg51e
 wzxLcTA1eq15w1yTGDORPMStP8lAsxbMhd+37s33teMxukvLsEfiT0Tbd9GbfzYk
 jmQ4QYjw8nd/C1NEzR4TTcHmwXvbP0TjGTDhbcBzChB71zxe5q827y2J+I/ki+0A
 4MBvf6OCMSAbY5sQo01csXxNABkM/fw9BmNgml8G2a24eFRWg9VU4WuBYVlVys0S
 ZZAcc2KiUNs1Zp7SxrTpI8hR0+SsuabVMvAdW8oct/6BRaR4t6toIsoQs1qQuHRD
 l18ErqulJpCTP+eMbGNgXKAAYvmb9ylQGOX+mnz9fnNXF3dyLyu77fFtL3FxXedE
 KkcQyfFf1l5ENMw/DArzjqwdrHJgm9kcOE2lyAmbq2+Ad6kJqVTTWNR6hojIRx7G
 lo2Rn+VLpjnmX8XNTiQokNeMsqKbTAF2M8KwEyYNPEz6WkpRTVWJcy3Cp4fDF65L
 TaYsB7M1b30D7CFAZTBx76MLQirhzNq1XxJZlMafXSymfDryZVMrMWSjmlb2bmlX
 Fer7iFWpfA8BHI46sZ0NqoGcljKr811dTxAqvsoek5lP6Kn+xnSqjeHSRzqNUl4l
 EfQW7ZR1vlRoKtrGqNBQ
 =5w3j
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block patches

# gpg: Signature made Fri 15 Aug 2014 14:07:42 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (59 commits)
  block: Catch !bs->drv in bdrv_check()
  iotests: Add test for image header overlap
  qcow2: Catch !*host_offset for data allocation
  qcow2: Return useful error code in refcount_init()
  mirror: Handle failure for potentially large allocations
  vpc: Handle failure for potentially large allocations
  vmdk: Handle failure for potentially large allocations
  vhdx: Handle failure for potentially large allocations
  vdi: Handle failure for potentially large allocations
  rbd: Handle failure for potentially large allocations
  raw-win32: Handle failure for potentially large allocations
  raw-posix: Handle failure for potentially large allocations
  qed: Handle failure for potentially large allocations
  qcow2: Handle failure for potentially large allocations
  qcow1: Handle failure for potentially large allocations
  parallels: Handle failure for potentially large allocations
  nfs: Handle failure for potentially large allocations
  iscsi: Handle failure for potentially large allocations
  dmg: Handle failure for potentially large allocations
  curl: Handle failure for potentially large allocations
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2014-08-15 14:49:50 +01:00
commit f2fb1da941
57 changed files with 2509 additions and 480 deletions

View file

@ -1000,3 +1000,9 @@ SSH
M: Richard W.M. Jones <rjones@redhat.com>
S: Supported
F: block/ssh.c
ARCHIPELAGO
M: Chrysostomos Nanakos <cnanakos@grnet.gr>
M: Chrysostomos Nanakos <chris@include.gr>
S: Maintained
F: block/archipelago.c

View file

@ -186,7 +186,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
if (sector < bdrv_nb_sectors(bmds->bs)) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
@ -223,8 +223,7 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds)
BlockDriverState *bs = bmds->bs;
int64_t bitmap_size;
bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);
@ -350,7 +349,7 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
int64_t sectors;
if (!bdrv_is_read_only(bs)) {
sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
sectors = bdrv_nb_sectors(bs);
if (sectors <= 0) {
return;
}
@ -799,7 +798,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
if (bs != bs_prev) {
bs_prev = bs;
total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
total_sectors = bdrv_nb_sectors(bs);
if (total_sectors <= 0) {
error_report("Error getting length of block device %s",
device_name);

140
block.c
View file

@ -57,6 +57,8 @@ struct BdrvDirtyBitmap {
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
#define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
@ -701,6 +703,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename,
/**
* Set the current 'total_sectors' value
* Return 0 on success, -errno on error.
*/
static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
@ -1313,7 +1316,6 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
total_size &= BDRV_SECTOR_MASK;
/* Create the temporary image */
ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
@ -2107,6 +2109,9 @@ int bdrv_attach_dev(BlockDriverState *bs, void *dev)
}
bs->dev = dev;
bdrv_iostatus_reset(bs);
/* We're expecting I/O from the device so bump up coroutine pool size */
qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
return 0;
}
@ -2126,6 +2131,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev)
bs->dev_ops = NULL;
bs->dev_opaque = NULL;
bs->guest_block_size = 512;
qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
}
/* TODO change to return DeviceState * when all users are qdevified */
@ -2203,6 +2209,9 @@ bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
*/
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
if (bs->drv == NULL) {
return -ENOMEDIUM;
}
if (bs->drv->bdrv_check == NULL) {
return -ENOTSUP;
}
@ -2269,7 +2278,14 @@ int bdrv_commit(BlockDriverState *bs)
}
total_sectors = length >> BDRV_SECTOR_BITS;
buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
/* qemu_try_blockalign() for bs will choose an alignment that works for
* bs->backing_hd as well, so no need to compare the alignment manually. */
buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
if (buf == NULL) {
ret = -ENOMEM;
goto ro_cleanup;
}
for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
@ -2307,7 +2323,7 @@ int bdrv_commit(BlockDriverState *bs)
ret = 0;
ro_cleanup:
g_free(buf);
qemu_vfree(buf);
if (ro) {
/* ignoring error return here */
@ -2827,18 +2843,16 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
*/
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
int64_t target_size;
int64_t ret, nb_sectors, sector_num = 0;
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
int n;
target_size = bdrv_getlength(bs);
if (target_size < 0) {
return target_size;
target_sectors = bdrv_nb_sectors(bs);
if (target_sectors < 0) {
return target_sectors;
}
target_size /= BDRV_SECTOR_SIZE;
for (;;) {
nb_sectors = target_size - sector_num;
nb_sectors = target_sectors - sector_num;
if (nb_sectors <= 0) {
return 0;
}
@ -2968,7 +2982,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
cluster_sector_num, cluster_nb_sectors);
iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
goto err;
}
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
@ -3056,15 +3075,14 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
/* Read zeros after EOF of growable BDSes */
int64_t len, total_sectors, max_nb_sectors;
int64_t total_sectors, max_nb_sectors;
len = bdrv_getlength(bs);
if (len < 0) {
ret = len;
total_sectors = bdrv_nb_sectors(bs);
if (total_sectors < 0) {
ret = total_sectors;
goto out;
}
total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
@ -3253,7 +3271,11 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
/* Fall back to bounce buffer if write zeroes is unsupported */
iov.iov_len = num * BDRV_SECTOR_SIZE;
if (iov.iov_base == NULL) {
iov.iov_base = qemu_blockalign(bs, num * BDRV_SECTOR_SIZE);
iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
if (iov.iov_base == NULL) {
ret = -ENOMEM;
goto fail;
}
memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
}
qemu_iovec_init_external(&qiov, &iov, 1);
@ -3273,6 +3295,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
nb_sectors -= num;
}
fail:
qemu_vfree(iov.iov_base);
return ret;
}
@ -3536,11 +3559,12 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
}
/**
* Length of a file in bytes. Return < 0 if error or unknown.
* Return number of sectors on success, -errno on error.
*/
int64_t bdrv_getlength(BlockDriverState *bs)
int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
if (!drv)
return -ENOMEDIUM;
@ -3550,19 +3574,26 @@ int64_t bdrv_getlength(BlockDriverState *bs)
return ret;
}
}
return bs->total_sectors * BDRV_SECTOR_SIZE;
return bs->total_sectors;
}
/**
* Return length in bytes on success, -errno on error.
* The length is always a multiple of BDRV_SECTOR_SIZE.
*/
int64_t bdrv_getlength(BlockDriverState *bs)
{
int64_t ret = bdrv_nb_sectors(bs);
return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
}
/* return 0 as number of sectors if no device present or error */
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
int64_t length;
length = bdrv_getlength(bs);
if (length < 0)
length = 0;
else
length = length >> BDRV_SECTOR_BITS;
*nb_sectors_ptr = length;
int64_t nb_sectors = bdrv_nb_sectors(bs);
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
@ -3945,21 +3976,21 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
int64_t length;
int64_t total_sectors;
int64_t n;
int64_t ret, ret2;
length = bdrv_getlength(bs);
if (length < 0) {
return length;
total_sectors = bdrv_nb_sectors(bs);
if (total_sectors < 0) {
return total_sectors;
}
if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
if (sector_num >= total_sectors) {
*pnum = 0;
return 0;
}
n = bs->total_sectors - sector_num;
n = total_sectors - sector_num;
if (n < nb_sectors) {
nb_sectors = n;
}
@ -3994,8 +4025,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
ret |= BDRV_BLOCK_ZERO;
} else if (bs->backing_hd) {
BlockDriverState *bs2 = bs->backing_hd;
int64_t length2 = bdrv_getlength(bs2);
if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
}
}
@ -4607,8 +4638,9 @@ static void bdrv_aio_bh_cb(void *opaque)
{
BlockDriverAIOCBSync *acb = opaque;
if (!acb->is_write)
if (!acb->is_write && acb->ret >= 0) {
qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_bh_delete(acb->bh);
@ -4630,10 +4662,12 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
acb->is_write = is_write;
acb->qiov = qiov;
acb->bounce = qemu_blockalign(bs, qiov->size);
acb->bounce = qemu_try_blockalign(bs, qiov->size);
acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
if (is_write) {
if (acb->bounce == NULL) {
acb->ret = -ENOMEM;
} else if (is_write) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
} else {
@ -5247,6 +5281,19 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
{
size_t align = bdrv_opt_mem_align(bs);
/* Ensure that NULL is never returned on success */
assert(align > 0);
if (size == 0) {
size = align;
}
return qemu_try_memalign(align, size);
}
/*
* Check if all memory in this vector is sector aligned.
*/
@ -5277,13 +5324,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
granularity >>= BDRV_SECTOR_BITS;
assert(granularity);
bitmap_size = bdrv_getlength(bs);
bitmap_size = bdrv_nb_sectors(bs);
if (bitmap_size < 0) {
error_setg_errno(errp, -bitmap_size, "could not get length of device");
errno = -bitmap_size;
return NULL;
}
bitmap_size >>= BDRV_SECTOR_BITS;
bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
@ -5371,6 +5417,9 @@ void bdrv_ref(BlockDriverState *bs)
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
if (!bs) {
return;
}
assert(bs->refcnt > 0);
if (--bs->refcnt == 0) {
bdrv_delete(bs);
@ -5591,7 +5640,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
uint64_t size;
int64_t size;
int back_flags;
/* backing files always opened read-only */
@ -5609,8 +5658,13 @@ void bdrv_img_create(const char *filename, const char *fmt,
local_err = NULL;
goto out;
}
bdrv_get_geometry(bs, &size);
size *= 512;
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not get size of '%s'",
backing_file);
bdrv_unref(bs);
goto out;
}
qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);

View file

@ -17,6 +17,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
endif
@ -35,5 +36,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
archipelago.o-libs := $(ARCHIPELAGO_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio

1069
block/archipelago.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -131,7 +131,11 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -EFBIG;
}
s->catalog_bitmap = g_malloc(s->catalog_size * 4);
s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
if (s->catalog_size && s->catalog_bitmap == NULL) {
error_setg(errp, "Could not allocate memory for catalog");
return -ENOMEM;
}
ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);

View file

@ -116,7 +116,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
"try increasing block size");
return -EINVAL;
}
s->offsets = g_malloc(offsets_size);
s->offsets = g_try_malloc(offsets_size);
if (s->offsets == NULL) {
error_setg(errp, "Could not allocate offsets table");
return -ENOMEM;
}
ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
@ -158,8 +163,20 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
s->compressed_block = g_malloc(max_compressed_block_size + 1);
s->uncompressed_block = g_malloc(s->block_size);
s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
if (s->compressed_block == NULL) {
error_setg(errp, "Could not allocate compressed_block");
ret = -ENOMEM;
goto fail;
}
s->uncompressed_block = g_try_malloc(s->block_size);
if (s->uncompressed_block == NULL) {
error_setg(errp, "Could not allocate uncompressed_block");
ret = -ENOMEM;
goto fail;
}
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;

View file

@ -640,7 +640,13 @@ static void curl_readv_bh_cb(void *p)
state->buf_start = start;
state->buf_len = acb->end + s->readahead_size;
end = MIN(start + state->buf_len, s->len) - 1;
state->orig_buf = g_malloc(state->buf_len);
state->orig_buf = g_try_malloc(state->buf_len);
if (state->buf_len && state->orig_buf == NULL) {
curl_clean_state(state);
acb->common.cb(acb->common.opaque, -ENOMEM);
qemu_aio_release(acb);
return;
}
state->acb[0] = acb;
snprintf(state->range, 127, "%zd-%zd", start, end);

View file

@ -284,8 +284,15 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
s->compressed_chunk = g_malloc(max_compressed_size + 1);
s->uncompressed_chunk = g_malloc(512 * max_sectors_per_chunk);
s->compressed_chunk = qemu_try_blockalign(bs->file,
max_compressed_size + 1);
s->uncompressed_chunk = qemu_try_blockalign(bs->file,
512 * max_sectors_per_chunk);
if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
ret = -ENOMEM;
goto fail;
}
if (inflateInit(&s->zstream) != Z_OK) {
ret = -EINVAL;
goto fail;
@ -302,8 +309,8 @@ fail:
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
g_free(s->compressed_chunk);
g_free(s->uncompressed_chunk);
qemu_vfree(s->compressed_chunk);
qemu_vfree(s->uncompressed_chunk);
return ret;
}
@ -426,8 +433,8 @@ static void dmg_close(BlockDriverState *bs)
g_free(s->lengths);
g_free(s->sectors);
g_free(s->sectorcounts);
g_free(s->compressed_chunk);
g_free(s->uncompressed_chunk);
qemu_vfree(s->compressed_chunk);
qemu_vfree(s->uncompressed_chunk);
inflateEnd(&s->zstream);
}

View file

@ -893,7 +893,10 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
if (iscsilun->zeroblock == NULL) {
iscsilun->zeroblock = g_malloc0(iscsilun->block_size);
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
if (iscsilun->zeroblock == NULL) {
return -ENOMEM;
}
}
iscsi_co_init_iscsitask(iscsilun, &iTask);

View file

@ -367,7 +367,12 @@ static void coroutine_fn mirror_run(void *opaque)
}
end = s->common.len >> BDRV_SECTOR_BITS;
s->buf = qemu_blockalign(bs, s->buf_size);
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
goto immediate_exit;
}
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
mirror_free_init(s);

View file

@ -172,7 +172,11 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
nfs_co_init_task(client, &task);
buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
if (nb_sectors && buf == NULL) {
return -ENOMEM;
}
qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
if (nfs_pwrite_async(client->context, client->fh,

View file

@ -105,7 +105,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EFBIG;
goto fail;
}
s->catalog_bitmap = g_malloc(s->catalog_size * 4);
s->catalog_bitmap = g_try_malloc(s->catalog_size * 4);
if (s->catalog_size && s->catalog_bitmap == NULL) {
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
if (ret < 0) {

View file

@ -28,6 +28,13 @@
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#ifndef FS_NOCOW_FL
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#endif
#endif
BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
{
@ -165,19 +172,28 @@ void bdrv_query_image_info(BlockDriverState *bs,
ImageInfo **p_info,
Error **errp)
{
uint64_t total_sectors;
int64_t size;
const char *backing_filename;
char backing_filename2[1024];
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
ImageInfo *info = g_new0(ImageInfo, 1);
ImageInfo *info;
#ifdef __linux__
int fd, attr;
#endif
bdrv_get_geometry(bs, &total_sectors);
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
return;
}
info = g_new0(ImageInfo, 1);
info->filename = g_strdup(bs->filename);
info->format = g_strdup(bdrv_get_format_name(bs));
info->virtual_size = total_sectors * 512;
info->virtual_size = size;
info->actual_size = bdrv_get_allocated_file_size(bs);
info->has_actual_size = info->actual_size >= 0;
if (bdrv_is_encrypted(bs)) {
@ -195,6 +211,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->format_specific = bdrv_get_specific_info(bs);
info->has_format_specific = info->format_specific != NULL;
#ifdef __linux__
/* get NOCOW info */
fd = qemu_open(bs->filename, O_RDONLY | O_NONBLOCK);
if (fd >= 0) {
if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0 && (attr & FS_NOCOW_FL)) {
info->has_nocow = true;
info->nocow = true;
}
qemu_close(fd);
}
#endif
backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') {
info->backing_filename = g_strdup(backing_filename);
@ -625,4 +653,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
func_fprintf(f, "Format specific information:\n");
bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
}
if (info->has_nocow && info->nocow) {
func_fprintf(f, "NOCOW flag: set\n");
}
}

View file

@ -182,7 +182,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
s->l1_table_offset = header.l1_table_offset;
s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
s->l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate memory for L1 table");
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
@ -193,8 +198,16 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
}
/* alloc L2 cache */
s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
/* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
s->l2_cache =
qemu_try_blockalign(bs->file,
s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
if (s->l2_cache == NULL) {
error_setg(errp, "Could not allocate L2 table cache");
ret = -ENOMEM;
goto fail;
}
s->cluster_cache = g_malloc(s->cluster_size);
s->cluster_data = g_malloc(s->cluster_size);
s->cluster_cache_offset = -1;
@ -226,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
fail:
g_free(s->l1_table);
g_free(s->l2_cache);
qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);
return ret;
@ -517,7 +530,10 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
void *orig_buf;
if (qiov->niov > 1) {
buf = orig_buf = qemu_blockalign(bs, qiov->size);
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
if (buf == NULL) {
return -ENOMEM;
}
} else {
orig_buf = NULL;
buf = (uint8_t *)qiov->iov->iov_base;
@ -619,7 +635,10 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
s->cluster_cache_offset = -1; /* disable compressed cache */
if (qiov->niov > 1) {
buf = orig_buf = qemu_blockalign(bs, qiov->size);
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
if (buf == NULL) {
return -ENOMEM;
}
qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
} else {
orig_buf = NULL;
@ -685,7 +704,7 @@ static void qcow_close(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
g_free(s->l2_cache);
qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
g_free(s->cluster_data);

View file

@ -53,10 +53,21 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
for (i = 0; i < c->size; i++) {
c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
if (c->entries[i].table == NULL) {
goto fail;
}
}
return c;
fail:
for (i = 0; i < c->size; i++) {
qemu_vfree(c->entries[i].table);
}
g_free(c->entries);
g_free(c);
return NULL;
}
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)

View file

@ -72,14 +72,20 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
new_l1_table = qemu_try_blockalign(bs->file,
align_offset(new_l1_size2, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
/* write new table (align to cluster) */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
if (new_l1_table_offset < 0) {
g_free(new_l1_table);
qemu_vfree(new_l1_table);
return new_l1_table_offset;
}
@ -113,7 +119,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
if (ret < 0) {
goto fail;
}
g_free(s->l1_table);
qemu_vfree(s->l1_table);
old_l1_table_offset = s->l1_table_offset;
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
@ -123,7 +129,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
QCOW2_DISCARD_OTHER);
return 0;
fail:
g_free(new_l1_table);
qemu_vfree(new_l1_table);
qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
QCOW2_DISCARD_OTHER);
return ret;
@ -372,7 +378,10 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
iov.iov_len = n * BDRV_SECTOR_SIZE;
iov.iov_base = qemu_blockalign(bs, iov.iov_len);
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
if (iov.iov_base == NULL) {
return -ENOMEM;
}
qemu_iovec_init_external(&qiov, &iov, 1);
@ -702,7 +711,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
assert(m->nb_clusters > 0);
old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
old_cluster = g_try_malloc(m->nb_clusters * sizeof(uint64_t));
if (old_cluster == NULL) {
ret = -ENOMEM;
goto err;
}
/* copy content of unmodified sectors */
ret = perform_cow(bs, m, &m->cow_start);
@ -1106,6 +1119,17 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
return 0;
}
/* !*host_offset would overwrite the image header and is reserved for "no
* host offset preferred". If 0 was a valid host offset, it'd trigger the
* following overlap check; do that now to avoid having an invalid value in
* *host_offset. */
if (!alloc_cluster_offset) {
ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
nb_clusters * s->cluster_size);
assert(ret < 0);
goto fail;
}
/*
* Save info needed for meta data update.
*
@ -1562,7 +1586,10 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
l2_table = qemu_blockalign(bs, s->cluster_size);
l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
if (l2_table == NULL) {
return -ENOMEM;
}
}
for (i = 0; i < l1_size; i++) {
@ -1740,7 +1767,11 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
nb_clusters = size_to_clusters(s, bs->file->total_sectors *
BDRV_SECTOR_SIZE);
expanded_clusters = g_malloc0((nb_clusters + 7) / 8);
expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
if (expanded_clusters == NULL) {
ret = -ENOMEM;
goto fail;
}
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&expanded_clusters, &nb_clusters);

View file

@ -46,19 +46,25 @@ int qcow2_refcount_init(BlockDriverState *bs)
assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
s->refcount_table = g_malloc(refcount_table_size2);
s->refcount_table = g_try_malloc(refcount_table_size2);
if (s->refcount_table_size > 0) {
if (s->refcount_table == NULL) {
ret = -ENOMEM;
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret != refcount_table_size2)
if (ret < 0) {
goto fail;
}
for(i = 0; i < s->refcount_table_size; i++)
be64_to_cpus(&s->refcount_table[i]);
}
return 0;
fail:
return -ENOMEM;
return ret;
}
void qcow2_refcount_close(BlockDriverState *bs)
@ -344,8 +350,14 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
s->cluster_size;
uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
uint64_t *new_table = g_try_malloc0(table_size * sizeof(uint64_t));
uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
assert(table_size > 0 && blocks_clusters > 0);
if (new_table == NULL || new_blocks == NULL) {
ret = -ENOMEM;
goto fail_table;
}
/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
@ -424,6 +436,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
return -EAGAIN;
fail_table:
g_free(new_blocks);
g_free(new_table);
fail_block:
if (*refcount_block != NULL) {
@ -847,7 +860,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
int i, j, l1_modified = 0, nb_csectors, refcount;
int ret;
@ -862,8 +876,12 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
if (l1_table_offset != s->l1_table_offset) {
l1_table = g_malloc0(align_offset(l1_size2, 512));
l1_allocated = 1;
l1_table = g_try_malloc0(align_offset(l1_size2, 512));
if (l1_size2 && l1_table == NULL) {
ret = -ENOMEM;
goto fail;
}
l1_allocated = true;
ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
@ -875,7 +893,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
assert(l1_size == s->l1_size);
l1_table = s->l1_table;
l1_allocated = 0;
l1_allocated = false;
}
for(i = 0; i < l1_size; i++) {
@ -1197,7 +1215,11 @@ static int check_refcounts_l1(BlockDriverState *bs,
if (l1_size2 == 0) {
l1_table = NULL;
} else {
l1_table = g_malloc(l1_size2);
l1_table = g_try_malloc(l1_size2);
if (l1_table == NULL) {
ret = -ENOMEM;
goto fail;
}
if (bdrv_pread(bs->file, l1_table_offset,
l1_table, l1_size2) != l1_size2)
goto fail;
@ -1501,7 +1523,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
return -EFBIG;
}
refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
refcount_table = g_try_malloc0(nb_clusters * sizeof(uint16_t));
if (nb_clusters && refcount_table == NULL) {
res->check_errors++;
return -ENOMEM;
}
res->bfi.total_clusters =
size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
@ -1753,9 +1779,13 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
uint32_t l1_sz = s->snapshots[i].l1_size;
uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
uint64_t *l1 = g_malloc(l1_sz2);
uint64_t *l1 = g_try_malloc(l1_sz2);
int ret;
if (l1_sz2 && l1 == NULL) {
return -ENOMEM;
}
ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);

View file

@ -381,7 +381,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
l1_table = g_try_malloc(s->l1_size * sizeof(uint64_t));
if (s->l1_size && l1_table == NULL) {
ret = -ENOMEM;
goto fail;
}
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
@ -499,7 +504,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
* Decrease the refcount referenced by the old one only when the L1
* table is overwritten.
*/
sn_l1_table = g_malloc0(cur_l1_bytes);
sn_l1_table = g_try_malloc0(cur_l1_bytes);
if (cur_l1_bytes && sn_l1_table == NULL) {
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
if (ret < 0) {
@ -698,17 +707,21 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
new_l1_table = qemu_try_blockalign(bs->file,
align_offset(new_l1_bytes, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
g_free(new_l1_table);
qemu_vfree(new_l1_table);
return ret;
}
/* Switch the L1 table */
g_free(s->l1_table);
qemu_vfree(s->l1_table);
s->l1_size = sn->l1_size;
s->l1_table_offset = sn->l1_table_offset;

View file

@ -688,8 +688,13 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
s->l1_table = g_malloc0(
s->l1_table = qemu_try_blockalign(bs->file,
align_offset(s->l1_size * sizeof(uint64_t), 512));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
@ -704,11 +709,22 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
/* alloc L2 table/refcount block cache */
s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
error_setg(errp, "Could not allocate metadata caches");
ret = -ENOMEM;
goto fail;
}
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ 512);
s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size + 512);
if (s->cluster_data == NULL) {
error_setg(errp, "Could not allocate temporary cluster buffer");
ret = -ENOMEM;
goto fail;
}
s->cluster_cache_offset = -1;
s->flags = flags;
@ -852,7 +868,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
g_free(s->l1_table);
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
if (s->l2_table_cache) {
@ -1082,7 +1098,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
*/
if (!cluster_data) {
cluster_data =
qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
}
}
assert(cur_nr_sectors <=
@ -1182,8 +1203,13 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
if (s->crypt_method) {
if (!cluster_data) {
cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
s->cluster_size);
cluster_data = qemu_try_blockalign(bs->file,
QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
}
}
assert(hd_qiov.size <=
@ -1270,7 +1296,7 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
g_free(s->l1_table);
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
@ -1557,7 +1583,7 @@ static int preallocate(BlockDriverState *bs)
int ret;
QCowL2Meta *meta;
nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
nb_sectors = bdrv_nb_sectors(bs);
offset = 0;
while (nb_sectors) {
@ -1947,7 +1973,6 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
cluster_offset = bdrv_getlength(bs->file);
cluster_offset = (cluster_offset + 511) & ~511;
bdrv_truncate(bs->file, cluster_offset);
return 0;
}

View file

@ -227,8 +227,11 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
};
int ret;
check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
sizeof(check.used_clusters[0]));
check.used_clusters = g_try_malloc0(((check.nclusters + 31) / 32) *
sizeof(check.used_clusters[0]));
if (check.nclusters && check.used_clusters == NULL) {
return -ENOMEM;
}
check.result->bfi.total_clusters =
(s->header.image_size + s->header.cluster_size - 1) /

View file

@ -1240,7 +1240,11 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
struct iovec *iov = acb->qiov->iov;
if (!iov->iov_base) {
iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
if (iov->iov_base == NULL) {
qed_aio_complete(acb, -ENOMEM);
return;
}
memset(iov->iov_base, 0, iov->iov_len);
}
}

View file

@ -798,7 +798,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
* Ok, we have to do it the hard way, copy all segments into
* a single aligned buffer.
*/
buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
if (buf == NULL) {
return -ENOMEM;
}
if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf;
int i;

View file

@ -617,7 +617,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCmd cmd)
{
RBDAIOCB *acb;
RADOSCB *rcb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
int64_t off, size;
char *buf;
@ -631,7 +631,10 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
acb->bounce = qemu_blockalign(bs, qiov->size);
acb->bounce = qemu_try_blockalign(bs, qiov->size);
if (acb->bounce == NULL) {
goto failed;
}
}
acb->ret = 0;
acb->error = 0;

View file

@ -53,13 +53,6 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#ifndef FS_NOCOW_FL
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#endif
#endif
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@ -299,7 +292,12 @@ static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
return -ENOTSUP;
}
bmap = g_malloc(s->header.blocks_in_image * sizeof(uint32_t));
bmap = g_try_malloc(s->header.blocks_in_image * sizeof(uint32_t));
if (s->header.blocks_in_image && bmap == NULL) {
res->check_errors++;
return -ENOMEM;
}
memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
/* Check block map and value of blocks_allocated. */
@ -357,23 +355,23 @@ static int vdi_make_empty(BlockDriverState *bs)
static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
{
const VdiHeader *header = (const VdiHeader *)buf;
int result = 0;
int ret = 0;
logout("\n");
if (buf_size < sizeof(*header)) {
/* Header too small, no VDI. */
} else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
result = 100;
ret = 100;
}
if (result == 0) {
if (ret == 0) {
logout("no vdi image\n");
} else {
logout("%s", header->text);
}
return result;
return ret;
}
static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
@ -478,7 +476,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
if (s->bmap == NULL) {
ret = -ENOMEM;
goto fail;
}
ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
if (ret < 0) {
goto fail_free_bmap;
@ -493,7 +496,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
fail_free_bmap:
g_free(s->bmap);
qemu_vfree(s->bmap);
fail:
return ret;
@ -681,8 +684,7 @@ static int vdi_co_write(BlockDriverState *bs,
static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
{
int fd;
int result = 0;
int ret = 0;
uint64_t bytes = 0;
uint32_t blocks;
size_t block_size = DEFAULT_CLUSTER_SIZE;
@ -690,7 +692,10 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
VdiHeader header;
size_t i;
size_t bmap_size;
bool nocow = false;
int64_t offset = 0;
Error *local_err = NULL;
BlockDriverState *bs = NULL;
uint32_t *bmap = NULL;
logout("\n");
@ -707,37 +712,25 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
image_type = VDI_TYPE_STATIC;
}
#endif
nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
if (bytes > VDI_DISK_SIZE_MAX) {
result = -ENOTSUP;
ret = -ENOTSUP;
error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
", max supported is 0x%" PRIx64 ")",
bytes, VDI_DISK_SIZE_MAX);
goto exit;
}
fd = qemu_open(filename,
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
0644);
if (fd < 0) {
result = -errno;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
}
if (nocow) {
#ifdef __linux__
/* Set NOCOW flag to solve performance issue on fs like btrfs.
* This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
* be ignored since any failure of this operation should not block the
* left work.
*/
int attr;
if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
attr |= FS_NOCOW_FL;
ioctl(fd, FS_IOC_SETFLAGS, &attr);
}
#endif
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
NULL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
}
/* We need enough blocks to store the given disk size,
@ -769,13 +762,20 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
if (write(fd, &header, sizeof(header)) < 0) {
result = -errno;
goto close_and_exit;
ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
}
offset += sizeof(header);
if (bmap_size > 0) {
uint32_t *bmap = g_malloc0(bmap_size);
bmap = g_try_malloc0(bmap_size);
if (bmap == NULL) {
ret = -ENOMEM;
error_setg(errp, "Could not allocate bmap");
goto exit;
}
for (i = 0; i < blocks; i++) {
if (image_type == VDI_TYPE_STATIC) {
bmap[i] = i;
@ -783,35 +783,33 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
if (write(fd, bmap, bmap_size) < 0) {
result = -errno;
g_free(bmap);
goto close_and_exit;
ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
}
g_free(bmap);
offset += bmap_size;
}
if (image_type == VDI_TYPE_STATIC) {
if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
result = -errno;
goto close_and_exit;
ret = bdrv_truncate(bs, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
}
}
close_and_exit:
if ((close(fd) < 0) && !result) {
result = -errno;
}
exit:
return result;
bdrv_unref(bs);
g_free(bmap);
return ret;
}
static void vdi_close(BlockDriverState *bs)
{
BDRVVdiState *s = bs->opaque;
g_free(s->bmap);
qemu_vfree(s->bmap);
migrate_del_blocker(s->migration_blocker);
error_free(s->migration_blocker);

View file

@ -82,8 +82,6 @@ void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
assert(d != NULL);
le32_to_cpus(&d->signature);
le32_to_cpus(&d->trailing_bytes);
le64_to_cpus(&d->leading_bytes);
le64_to_cpus(&d->file_offset);
le64_to_cpus(&d->sequence_number);
}
@ -99,6 +97,15 @@ void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
cpu_to_le64s(&d->sequence_number);
}
void vhdx_log_data_le_import(VHDXLogDataSector *d)
{
assert(d != NULL);
le32_to_cpus(&d->data_signature);
le32_to_cpus(&d->sequence_high);
le32_to_cpus(&d->sequence_low);
}
void vhdx_log_data_le_export(VHDXLogDataSector *d)
{
assert(d != NULL);

View file

@ -84,6 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
if (ret < 0) {
goto exit;
}
vhdx_log_entry_hdr_le_import(hdr);
exit:
return ret;
@ -211,7 +212,7 @@ static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
{
int valid = false;
if (memcmp(&hdr->signature, "loge", 4)) {
if (hdr->signature != VHDX_LOG_SIGNATURE) {
goto exit;
}
@ -275,12 +276,12 @@ static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
goto exit;
}
if (!memcmp(&desc->signature, "zero", 4)) {
if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
/* valid */
ret = true;
}
} else if (!memcmp(&desc->signature, "desc", 4)) {
} else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* valid */
ret = true;
}
@ -327,13 +328,15 @@ static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
* passed into this function. Each descriptor will also be validated,
* and error returned if any are invalid. */
static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
VHDXLogEntries *log, VHDXLogDescEntries **buffer)
VHDXLogEntries *log, VHDXLogDescEntries **buffer,
bool convert_endian)
{
int ret = 0;
uint32_t desc_sectors;
uint32_t sectors_read;
VHDXLogEntryHeader hdr;
VHDXLogDescEntries *desc_entries = NULL;
VHDXLogDescriptor desc;
int i;
assert(*buffer == NULL);
@ -342,14 +345,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
if (ret < 0) {
goto exit;
}
vhdx_log_entry_hdr_le_import(&hdr);
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
ret = -EINVAL;
goto exit;
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE);
desc_entries = qemu_try_blockalign(bs->file,
desc_sectors * VHDX_LOG_SECTOR_SIZE);
if (desc_entries == NULL) {
ret = -ENOMEM;
goto exit;
}
ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
desc_sectors, false);
@ -363,12 +371,19 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
/* put in proper endianness, and validate each desc */
for (i = 0; i < hdr.descriptor_count; i++) {
vhdx_log_desc_le_import(&desc_entries->desc[i]);
if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) {
desc = desc_entries->desc[i];
vhdx_log_desc_le_import(&desc);
if (convert_endian) {
desc_entries->desc[i] = desc;
}
if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
ret = -EINVAL;
goto free_and_exit;
}
}
if (convert_endian) {
desc_entries->hdr = hdr;
}
*buffer = desc_entries;
goto exit;
@ -403,7 +418,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
if (!memcmp(&desc->signature, "desc", 4)) {
if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector */
if (data == NULL) {
ret = -EFAULT;
@ -431,10 +446,15 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
memcpy(buffer+offset, &desc->trailing_bytes, 4);
} else if (!memcmp(&desc->signature, "zero", 4)) {
} else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
/* write 'count' sectors of sector */
memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
} else {
error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
desc->signature);
ret = -EINVAL;
goto exit;
}
file_offset = desc->file_offset;
@ -493,13 +513,13 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
goto exit;
}
ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries);
ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
if (ret < 0) {
goto exit;
}
for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) {
if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
/* data sector, so read a sector to flush */
ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
data, 1, false);
@ -510,6 +530,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
ret = -EINVAL;
goto exit;
}
vhdx_log_data_le_import(data);
}
ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
@ -558,9 +579,6 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
goto inc_and_exit;
}
vhdx_log_entry_hdr_le_import(&hdr);
if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
goto inc_and_exit;
}
@ -573,13 +591,13 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
/* Read desc sectors, and calculate log checksum */
/* Read all log sectors, and calculate log checksum */
total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
/* read_desc() will increment the read idx */
ret = vhdx_log_read_desc(bs, s, log, &desc_buffer);
ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
if (ret < 0) {
goto free_and_exit;
}
@ -602,7 +620,7 @@ static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
}
}
crc ^= 0xffffffff;
if (crc != desc_buffer->hdr.checksum) {
if (crc != hdr.checksum) {
goto free_and_exit;
}
@ -962,7 +980,6 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
* last data sector */
vhdx_update_checksum(buffer, total_length,
offsetof(VHDXLogEntryHeader, checksum));
cpu_to_le32s((uint32_t *)(buffer + 4));
/* now write to the log */
ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,

View file

@ -135,10 +135,8 @@ typedef struct VHDXSectorInfo {
* buf: buffer pointer
* size: size of buffer (must be > crc_offset+4)
*
* Note: The resulting checksum is in the CPU endianness, not necessarily
* in the file format endianness (LE). Any header export to disk should
* make sure that vhdx_header_le_export() is used to convert to the
* correct endianness
* Note: The buffer should have all multi-byte data in little-endian format,
* and the resulting checksum is in little endian format.
*/
uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
{
@ -149,6 +147,7 @@ uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
memset(buf + crc_offset, 0, sizeof(crc));
crc = crc32c(0xffffffff, buf, size);
cpu_to_le32s(&crc);
memcpy(buf + crc_offset, &crc, sizeof(crc));
return crc;
@ -300,7 +299,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
{
uint8_t *buffer = NULL;
int ret;
VHDXHeader header_le;
VHDXHeader *header_le;
assert(bs_file != NULL);
assert(hdr != NULL);
@ -321,11 +320,12 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
}
/* overwrite the actual VHDXHeader portion */
memcpy(buffer, hdr, sizeof(VHDXHeader));
hdr->checksum = vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
offsetof(VHDXHeader, checksum));
vhdx_header_le_export(hdr, &header_le);
ret = bdrv_pwrite_sync(bs_file, offset, &header_le, sizeof(VHDXHeader));
header_le = (VHDXHeader *)buffer;
memcpy(header_le, hdr, sizeof(VHDXHeader));
vhdx_header_le_export(hdr, header_le);
vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
offsetof(VHDXHeader, checksum));
ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@ -432,13 +432,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header1, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header1);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header1->signature, "head", 4) &&
header1->version == 1) {
h1_seq = header1->sequence_number;
h1_valid = true;
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
vhdx_header_le_import(header1);
if (header1->signature == VHDX_HEADER_SIGNATURE &&
header1->version == 1) {
h1_seq = header1->sequence_number;
h1_valid = true;
}
}
ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
@ -447,13 +448,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
/* copy over just the relevant portion that we need */
memcpy(header2, buffer, sizeof(VHDXHeader));
vhdx_header_le_import(header2);
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
!memcmp(&header2->signature, "head", 4) &&
header2->version == 1) {
h2_seq = header2->sequence_number;
h2_valid = true;
if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
vhdx_header_le_import(header2);
if (header2->signature == VHDX_HEADER_SIGNATURE &&
header2->version == 1) {
h2_seq = header2->sequence_number;
h2_valid = true;
}
}
/* If there is only 1 valid header (or no valid headers), we
@ -519,15 +521,21 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
goto fail;
}
memcpy(&s->rt, buffer, sizeof(s->rt));
vhdx_region_header_le_import(&s->rt);
offset += sizeof(s->rt);
if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
memcmp(&s->rt.signature, "regi", 4)) {
if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
ret = -EINVAL;
goto fail;
}
vhdx_region_header_le_import(&s->rt);
if (s->rt.signature != VHDX_REGION_SIGNATURE) {
ret = -EINVAL;
goto fail;
}
/* Per spec, maximum region table entry count is 2047 */
if (s->rt.entry_count > 2047) {
ret = -EINVAL;
@ -630,7 +638,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
vhdx_metadata_header_le_import(&s->metadata_hdr);
if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
ret = -EINVAL;
goto exit;
}
@ -950,7 +958,11 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
}
/* s->bat is freed in vhdx_close() */
s->bat = qemu_blockalign(bs, s->bat_rt.length);
s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
if (s->bat == NULL) {
ret = -ENOMEM;
goto fail;
}
ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
@ -1540,7 +1552,8 @@ exit:
*/
static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
bool use_zero_blocks, VHDXRegionTableEntry *rt_bat)
bool use_zero_blocks, uint64_t file_offset,
uint32_t length)
{
int ret = 0;
uint64_t data_file_offset;
@ -1555,7 +1568,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
/* this gives a data start after BAT/bitmap entries, and well
* past any metadata entries (with a 4 MB buffer for future
* expansion */
data_file_offset = rt_bat->file_offset + rt_bat->length + 5 * MiB;
data_file_offset = file_offset + length + 5 * MiB;
total_sectors = image_size >> s->logical_sector_size_bits;
if (type == VHDX_TYPE_DYNAMIC) {
@ -1579,7 +1592,11 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
use_zero_blocks ||
bdrv_has_zero_init(bs) == 0) {
/* for a fixed file, the default BAT entry is not zero */
s->bat = g_malloc0(rt_bat->length);
s->bat = g_try_malloc0(length);
if (length && s->bat != NULL) {
ret = -ENOMEM;
goto exit;
}
block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
PAYLOAD_BLOCK_NOT_PRESENT;
block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
@ -1594,7 +1611,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
ret = bdrv_pwrite(bs, rt_bat->file_offset, s->bat, rt_bat->length);
ret = bdrv_pwrite(bs, file_offset, s->bat, length);
if (ret < 0) {
goto exit;
}
@ -1626,6 +1643,8 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
int ret = 0;
uint32_t offset = 0;
void *buffer = NULL;
uint64_t bat_file_offset;
uint32_t bat_length;
BDRVVHDXState *s = NULL;
VHDXRegionTableHeader *region_table;
VHDXRegionTableEntry *rt_bat;
@ -1674,19 +1693,26 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
rt_metadata->length = 1 * MiB; /* min size, and more than enough */
*metadata_offset = rt_metadata->file_offset;
bat_file_offset = rt_bat->file_offset;
bat_length = rt_bat->length;
vhdx_region_header_le_export(region_table);
vhdx_region_entry_le_export(rt_bat);
vhdx_region_entry_le_export(rt_metadata);
vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
offsetof(VHDXRegionTableHeader, checksum));
/* The region table gives us the data we need to create the BAT,
* so do that now */
ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks, rt_bat);
ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
bat_file_offset, bat_length);
if (ret < 0) {
goto exit;
}
/* Now write out the region headers to disk */
vhdx_region_header_le_export(region_table);
vhdx_region_entry_le_export(rt_bat);
vhdx_region_entry_le_export(rt_metadata);
ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {

View file

@ -435,6 +435,7 @@ void vhdx_header_le_import(VHDXHeader *h);
void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
void vhdx_log_data_le_import(VHDXLogDataSector *d);
void vhdx_log_data_le_export(VHDXLogDataSector *d);
void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);

View file

@ -106,6 +106,7 @@ typedef struct VmdkExtent {
uint32_t l2_cache_counts[L2_CACHE_SIZE];
int64_t cluster_sectors;
int64_t next_cluster_sector;
char *type;
} VmdkExtent;
@ -124,7 +125,6 @@ typedef struct BDRVVmdkState {
} BDRVVmdkState;
typedef struct VmdkMetaData {
uint32_t offset;
unsigned int l1_index;
unsigned int l2_index;
unsigned int l2_offset;
@ -397,6 +397,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
{
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
int64_t length;
if (cluster_sectors > 0x200000) {
/* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
@ -412,6 +413,11 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
length = bdrv_getlength(file);
if (length < 0) {
return length;
}
s->extents = g_realloc(s->extents,
(s->num_extents + 1) * sizeof(VmdkExtent));
extent = &s->extents[s->num_extents];
@ -427,6 +433,8 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l1_entry_sectors = l2_size * cluster_sectors;
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
extent->next_cluster_sector =
ROUND_UP(DIV_ROUND_UP(length, BDRV_SECTOR_SIZE), cluster_sectors);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@ -448,7 +456,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
/* read the L1 table */
l1_size = extent->l1_size * sizeof(uint32_t);
extent->l1_table = g_malloc(l1_size);
extent->l1_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_table == NULL) {
return -ENOMEM;
}
ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
@ -464,7 +476,11 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
if (extent->l1_backup_table_offset) {
extent->l1_backup_table = g_malloc(l1_size);
extent->l1_backup_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_backup_table == NULL) {
ret = -ENOMEM;
goto fail_l1;
}
ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
@ -669,8 +685,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
if (bdrv_getlength(file) <
le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@ -952,57 +967,97 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
}
}
/**
* get_whole_cluster
*
* Copy backing file's cluster that covers @sector_num, otherwise write zero,
* to the cluster at @cluster_sector_num.
*
* If @skip_start_sector < @skip_end_sector, the relative range
* [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
* it for call to write user data in the request.
*/
static int get_whole_cluster(BlockDriverState *bs,
VmdkExtent *extent,
uint64_t cluster_offset,
uint64_t offset,
bool allocate)
VmdkExtent *extent,
uint64_t cluster_sector_num,
uint64_t sector_num,
uint64_t skip_start_sector,
uint64_t skip_end_sector)
{
int ret = VMDK_OK;
uint8_t *whole_grain = NULL;
int64_t cluster_bytes;
uint8_t *whole_grain;
/* For COW, align request sector_num to cluster start */
sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
whole_grain = qemu_blockalign(bs, cluster_bytes);
if (!bs->backing_hd) {
memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
}
assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing_hd) {
whole_grain =
qemu_blockalign(bs, extent->cluster_sectors << BDRV_SECTOR_BITS);
if (!vmdk_is_cid_valid(bs)) {
ret = VMDK_ERROR;
goto exit;
}
if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
ret = VMDK_ERROR;
goto exit;
}
/* floor offset to cluster */
offset -= offset % (extent->cluster_sectors * 512);
ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
extent->cluster_sectors);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
/* Read backing data before skip range */
if (skip_start_sector > 0) {
if (bs->backing_hd) {
ret = bdrv_read(bs->backing_hd, sector_num,
whole_grain, skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
/* Write grain only into the active image */
ret = bdrv_write(extent->file, cluster_offset, whole_grain,
extent->cluster_sectors);
ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
/* Read backing data after skip range */
if (skip_end_sector < extent->cluster_sectors) {
if (bs->backing_hd) {
ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
exit:
qemu_vfree(whole_grain);
return ret;
}
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
uint32_t offset)
{
uint32_t offset;
QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
offset = cpu_to_le32(m_data->offset);
offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@ -1012,7 +1067,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
if (bdrv_pwrite_sync(
extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
return VMDK_ERROR;
}
@ -1024,17 +1079,41 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
return VMDK_OK;
}
/**
* get_cluster_offset
*
* Look up cluster offset in extent file by sector number, and store in
* @cluster_offset.
*
* For flat extents, the start offset as parsed from the description file is
* returned.
*
* For sparse extents, look up in L1, L2 table. If allocate is true, return an
* offset for a new cluster and update L2 cache. If there is a backing file,
* COW is done before returning; otherwise, zeroes are written to the allocated
* cluster. Both COW and zero writing skips the sector range
* [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
* has new data to write there.
*
* Returns: VMDK_OK if cluster exists and mapped in the image.
* VMDK_UNALLOC if cluster is not mapped and @allocate is false.
* VMDK_ERROR if failed.
*/
static int get_cluster_offset(BlockDriverState *bs,
VmdkExtent *extent,
VmdkMetaData *m_data,
uint64_t offset,
int allocate,
uint64_t *cluster_offset)
VmdkExtent *extent,
VmdkMetaData *m_data,
uint64_t offset,
bool allocate,
uint64_t *cluster_offset,
uint64_t skip_start_sector,
uint64_t skip_end_sector)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
uint32_t min_count, *l2_table;
bool zeroed = false;
int64_t ret;
int32_t cluster_sector;
if (m_data) {
m_data->valid = 0;
@ -1088,52 +1167,41 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
*cluster_offset = le32_to_cpu(l2_table[l2_index]);
cluster_sector = le32_to_cpu(l2_table[l2_index]);
if (m_data) {
m_data->valid = 1;
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->offset = *cluster_offset;
m_data->l2_offset = l2_offset;
m_data->l2_cache_entry = &l2_table[l2_index];
}
if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
if (!*cluster_offset || zeroed) {
if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
/* Avoid the L2 tables update for the images that have snapshots. */
*cluster_offset = bdrv_getlength(extent->file);
if (!extent->compressed) {
bdrv_truncate(
extent->file,
*cluster_offset + (extent->cluster_sectors << 9)
);
}
*cluster_offset >>= 9;
l2_table[l2_index] = cpu_to_le32(*cluster_offset);
cluster_sector = extent->next_cluster_sector;
extent->next_cluster_sector += extent->cluster_sectors;
/* First of all we write grain itself, to avoid race condition
* that may to corrupt the image.
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
if (get_whole_cluster(
bs, extent, *cluster_offset, offset, allocate) == -1) {
return VMDK_ERROR;
}
if (m_data) {
m_data->offset = *cluster_offset;
ret = get_whole_cluster(bs, extent,
cluster_sector,
offset >> BDRV_SECTOR_BITS,
skip_start_sector, skip_end_sector);
if (ret) {
return ret;
}
}
*cluster_offset <<= 9;
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
}
@ -1168,7 +1236,8 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
ret = get_cluster_offset(bs, extent, NULL,
sector_num * 512, 0, &offset);
sector_num * 512, false, &offset,
0, 0);
qemu_co_mutex_unlock(&s->lock);
switch (ret) {
@ -1321,9 +1390,9 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
ret = get_cluster_offset(
bs, extent, NULL,
sector_num << 9, 0, &cluster_offset);
ret = get_cluster_offset(bs, extent, NULL,
sector_num << 9, false, &cluster_offset,
0, 0);
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
@ -1404,12 +1473,17 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (!extent) {
return -EIO;
}
ret = get_cluster_offset(
bs,
extent,
&m_data,
sector_num << 9, !extent->compressed,
&cluster_offset);
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
}
ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
!(extent->compressed || zeroed),
&cluster_offset,
index_in_cluster, index_in_cluster + n);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@ -1418,24 +1492,13 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
ret = get_cluster_offset(
bs,
extent,
&m_data,
sector_num << 9, 1,
&cluster_offset);
ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
true, &cluster_offset, 0, 0);
}
}
if (ret == VMDK_ERROR) {
return -EINVAL;
}
extent_begin_sector = extent->end_sector - extent->sectors;
extent_relative_sector_num = sector_num - extent_begin_sector;
index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
}
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
@ -1443,9 +1506,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
n >= extent->cluster_sectors) {
n = extent->cluster_sectors;
if (!zero_dry_run) {
m_data.offset = VMDK_GTE_ZEROED;
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
!= VMDK_OK) {
return -EIO;
}
}
@ -1461,7 +1524,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
if (m_data.valid) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
if (vmdk_L2update(extent, &m_data,
cluster_offset >> BDRV_SECTOR_BITS)
!= VMDK_OK) {
return -EIO;
}
}
@ -1999,7 +2064,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int64_t sector_num = 0;
int64_t total_sectors = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
int64_t total_sectors = bdrv_nb_sectors(bs);
int ret;
uint64_t cluster_offset;
@ -2020,7 +2085,7 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
}
ret = get_cluster_offset(bs, extent, NULL,
sector_num << BDRV_SECTOR_BITS,
0, &cluster_offset);
false, &cluster_offset, 0, 0);
if (ret == VMDK_ERROR) {
fprintf(stderr,
"ERROR: could not get cluster_offset for sector %"

View file

@ -29,13 +29,6 @@
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
#ifdef __linux__
#include <linux/fs.h>
#include <sys/ioctl.h>
#ifndef FS_NOCOW_FL
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#endif
#endif
/**************************************************************/
@ -276,7 +269,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
s->pagetable = qemu_blockalign(bs, s->max_table_entries * 4);
s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
if (s->pagetable == NULL) {
ret = -ENOMEM;
goto fail;
}
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
@ -656,39 +653,41 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
(VHDDynDiskHeader *) buf;
size_t block_size, num_bat_entries;
int i;
int ret = -EIO;
int ret;
int64_t offset = 0;
// Write the footer (twice: at the beginning and at the end)
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
if (ret) {
goto fail;
}
if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) {
goto fail;
}
if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
// Write the initial BAT
if (lseek(fd, 3 * 512, SEEK_SET) < 0) {
goto fail;
}
offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
if (write(fd, buf, 512) != 512) {
ret = bdrv_pwrite_sync(bs, offset, buf, 512);
if (ret < 0) {
goto fail;
}
offset += 512;
}
// Prepare the Dynamic Disk Header
@ -709,39 +708,35 @@ static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
// Write the header
if (lseek(fd, 512, SEEK_SET) < 0) {
goto fail;
}
offset = 512;
if (write(fd, buf, 1024) != 1024) {
ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
if (ret < 0) {
goto fail;
}
ret = 0;
fail:
return ret;
}
static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
int64_t total_size)
{
int ret = -EIO;
int ret;
/* Add footer to total size */
total_size += 512;
if (ftruncate(fd, total_size) != 0) {
ret = -errno;
goto fail;
}
if (lseek(fd, -512, SEEK_END) < 0) {
goto fail;
}
if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
goto fail;
total_size += HEADER_SIZE;
ret = bdrv_truncate(bs, total_size);
if (ret < 0) {
return ret;
}
ret = 0;
ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
if (ret < 0) {
return ret;
}
fail:
return ret;
}
@ -750,7 +745,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
uint8_t buf[1024];
VHDFooter *footer = (VHDFooter *) buf;
char *disk_type_param;
int fd, i;
int i;
uint16_t cyls = 0;
uint8_t heads = 0;
uint8_t secs_per_cyl = 0;
@ -758,7 +753,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
bool nocow = false;
Error *local_err = NULL;
BlockDriverState *bs = NULL;
/* Read out options */
total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
@ -775,28 +771,17 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
} else {
disk_type = VHD_DYNAMIC;
}
nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
/* Create the file */
fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
if (fd < 0) {
ret = -EIO;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
if (nocow) {
#ifdef __linux__
/* Set NOCOW flag to solve performance issue on fs like btrfs.
* This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
* be ignored since any failure of this operation should not block the
* left work.
*/
int attr;
if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
attr |= FS_NOCOW_FL;
ioctl(fd, FS_IOC_SETFLAGS, &attr);
}
#endif
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
NULL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
/*
@ -810,7 +795,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
&secs_per_cyl))
{
ret = -EFBIG;
goto fail;
goto out;
}
}
@ -856,14 +841,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
ret = create_dynamic_disk(fd, buf, total_sectors);
ret = create_dynamic_disk(bs, buf, total_sectors);
} else {
ret = create_fixed_disk(fd, buf, total_size);
ret = create_fixed_disk(bs, buf, total_size);
}
fail:
qemu_close(fd);
out:
bdrv_unref(bs);
g_free(disk_type_param);
return ret;
}

View file

@ -139,7 +139,10 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
waiocb->is_read = (type == QEMU_AIO_READ);
if (qiov->niov > 1) {
waiocb->buf = qemu_blockalign(bs, qiov->size);
waiocb->buf = qemu_try_blockalign(bs, qiov->size);
if (waiocb->buf == NULL) {
goto out;
}
if (type & QEMU_AIO_WRITE) {
iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
}
@ -168,6 +171,7 @@ BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
out_dec_count:
aio->count--;
out:
qemu_aio_release(waiocb);
return NULL;
}

52
configure vendored
View file

@ -326,6 +326,7 @@ seccomp=""
glusterfs=""
glusterfs_discard="no"
glusterfs_zerofill="no"
archipelago=""
virtio_blk_data_plane=""
gtk=""
gtkabi=""
@ -1087,6 +1088,10 @@ for opt do
;;
--enable-glusterfs) glusterfs="yes"
;;
--disable-archipelago) archipelago="no"
;;
--enable-archipelago) archipelago="yes"
;;
--disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
;;
--enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
@ -1382,6 +1387,8 @@ Advanced options (experts only):
--enable-coroutine-pool enable coroutine freelist (better performance)
--enable-glusterfs enable GlusterFS backend
--disable-glusterfs disable GlusterFS backend
--enable-archipelago enable Archipelago backend
--disable-archipelago disable Archipelago backend
--enable-gcov enable test coverage analysis with gcov
--gcov=GCOV use specified gcov [$gcov_tool]
--disable-tpm disable TPM support
@ -3072,6 +3079,33 @@ EOF
fi
fi
##########################################
# archipelago probe
if test "$archipelago" != "no" ; then
cat > $TMPC <<EOF
#include <stdio.h>
#include <xseg/xseg.h>
#include <xseg/protocol.h>
int main(void) {
xseg_initialize();
return 0;
}
EOF
archipelago_libs=-lxseg
if compile_prog "" "$archipelago_libs"; then
archipelago="yes"
libs_tools="$archipelago_libs $libs_tools"
libs_softmmu="$archipelago_libs $libs_softmmu"
else
if test "$archipelago" = "yes" ; then
feature_not_found "Archipelago backend support" "Install libxseg devel"
fi
archipelago="no"
fi
fi
##########################################
# glusterfs probe
if test "$glusterfs" != "no" ; then
@ -3087,7 +3121,8 @@ if test "$glusterfs" != "no" ; then
fi
else
if test "$glusterfs" = "yes" ; then
feature_not_found "GlusterFS backend support" "Install glusterfs-api devel"
feature_not_found "GlusterFS backend support" \
"Install glusterfs-api devel >= 3"
fi
glusterfs="no"
fi
@ -3532,7 +3567,8 @@ EOF
spice_server_version=$($pkg_config --modversion spice-server)
else
if test "$spice" = "yes" ; then
feature_not_found "spice" "Install spice-server and spice-protocol devel"
feature_not_found "spice" \
"Install spice-server(>=0.12.0) and spice-protocol(>=0.12.3) devel"
fi
spice="no"
fi
@ -3563,7 +3599,7 @@ EOF
smartcard_nss="yes"
else
if test "$smartcard_nss" = "yes"; then
feature_not_found "nss"
feature_not_found "nss" "Install nss devel >= 3.12.8"
fi
smartcard_nss="no"
fi
@ -3579,7 +3615,7 @@ if test "$libusb" != "no" ; then
libs_softmmu="$libs_softmmu $libusb_libs"
else
if test "$libusb" = "yes"; then
feature_not_found "libusb" "Install libusb devel"
feature_not_found "libusb" "Install libusb devel >= 1.0.13"
fi
libusb="no"
fi
@ -4004,7 +4040,7 @@ if test "$libnfs" != "no" ; then
LIBS="$LIBS $libnfs_libs"
else
if test "$libnfs" = "yes" ; then
feature_not_found "libnfs"
feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
fi
libnfs="no"
fi
@ -4251,6 +4287,7 @@ echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "GlusterFS support $glusterfs"
echo "Archipelago support $archipelago"
echo "virtio-blk-data-plane $virtio_blk_data_plane"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
@ -4689,6 +4726,11 @@ if test "$glusterfs_zerofill" = "yes" ; then
echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
fi
if test "$archipelago" = "yes" ; then
echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak
echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak
fi
if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=m" >> $config_host_mak
echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak

134
docs/multiple-iothreads.txt Normal file
View file

@ -0,0 +1,134 @@
Copyright (c) 2014 Red Hat Inc.
This work is licensed under the terms of the GNU GPL, version 2 or later. See
the COPYING file in the top-level directory.
This document explains the IOThread feature and how to write code that runs
outside the QEMU global mutex.
The main loop and IOThreads
---------------------------
QEMU is an event-driven program that can do several things at once using an
event loop. The VNC server and the QMP monitor are both processed from the
same event loop, which monitors their file descriptors until they become
readable and then invokes a callback.
The default event loop is called the main loop (see main-loop.c). It is
possible to create additional event loop threads using -object
iothread,id=my-iothread.
Side note: The main loop and IOThread are both event loops but their code is
not shared completely. Sometimes it is useful to remember that although they
are conceptually similar they are currently not interchangeable.
Why IOThreads are useful
------------------------
IOThreads allow the user to control the placement of work. The main loop is a
scalability bottleneck on hosts with many CPUs. Work can be spread across
several IOThreads instead of just one main loop. When set up correctly this
can improve I/O latency and reduce jitter seen by the guest.
The main loop is also deeply associated with the QEMU global mutex, which is a
scalability bottleneck in itself. vCPU threads and the main loop use the QEMU
global mutex to serialize execution of QEMU code. This mutex is necessary
because a lot of QEMU's code historically was not thread-safe.
The fact that all I/O processing is done in a single main loop and that the
QEMU global mutex is contended by all vCPU threads and the main loop explain
why it is desirable to place work into IOThreads.
The experimental virtio-blk data-plane implementation has been benchmarked and
shows these effects:
ftp://public.dhe.ibm.com/linux/pdfs/KVM_Virtualized_IO_Performance_Paper.pdf
How to program for IOThreads
----------------------------
The main difference between legacy code and new code that can run in an
IOThread is dealing explicitly with the event loop object, AioContext
(see include/block/aio.h). Code that only works in the main loop
implicitly uses the main loop's AioContext. Code that supports running
in IOThreads must be aware of its AioContext.
AioContext supports the following services:
* File descriptor monitoring (read/write/error on POSIX hosts)
* Event notifiers (inter-thread signalling)
* Timers
* Bottom Halves (BH) deferred callbacks
There are several old APIs that use the main loop AioContext:
* LEGACY qemu_aio_set_fd_handler() - monitor a file descriptor
* LEGACY qemu_aio_set_event_notifier() - monitor an event notifier
* LEGACY timer_new_ms() - create a timer
* LEGACY qemu_bh_new() - create a BH
* LEGACY qemu_aio_wait() - run an event loop iteration
Since they implicitly work on the main loop they cannot be used in code that
runs in an IOThread. They might cause a crash or deadlock if called from an
IOThread since the QEMU global mutex is not held.
Instead, use the AioContext functions directly (see include/block/aio.h):
* aio_set_fd_handler() - monitor a file descriptor
* aio_set_event_notifier() - monitor an event notifier
* aio_timer_new() - create a timer
* aio_bh_new() - create a BH
* aio_poll() - run an event loop iteration
The AioContext can be obtained from the IOThread using
iothread_get_aio_context() or for the main loop using qemu_get_aio_context().
Code that takes an AioContext argument works both in IOThreads or the main
loop, depending on which AioContext instance the caller passes in.
How to synchronize with an IOThread
-----------------------------------
AioContext is not thread-safe so some rules must be followed when using file
descriptors, event notifiers, timers, or BHs across threads:
1. AioContext functions can be called safely from file descriptor, event
notifier, timer, or BH callbacks invoked by the AioContext. No locking is
necessary.
2. Other threads wishing to access the AioContext must use
aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
context is acquired no other thread can access it or run event loop iterations
in this AioContext.
aio_context_acquire()/aio_context_release() calls may be nested. This
means you can call them if you're not sure whether #1 applies.
There is currently no lock ordering rule if a thread needs to acquire multiple
AioContexts simultaneously. Therefore, it is only safe for code holding the
QEMU global mutex to acquire other AioContexts.
Side note: the best way to schedule a function call across threads is to create
a BH in the target AioContext beforehand and then call qemu_bh_schedule(). No
acquire/release or locking is needed for the qemu_bh_schedule() call. But be
sure to acquire the AioContext for aio_bh_new() if necessary.
The relationship between AioContext and the block layer
-------------------------------------------------------
The AioContext originates from the QEMU block layer because it provides a
scoped way of running event loop iterations until all work is done. This
feature is used to complete all in-flight block I/O requests (see
bdrv_drain_all()). Nowadays AioContext is a generic event loop that can be
used by any QEMU subsystem.
The block layer has support for AioContext integrated. Each BlockDriverState
is associated with an AioContext using bdrv_set_aio_context() and
bdrv_get_aio_context(). This allows block layer code to process I/O inside the
right AioContext. Other subsystems may wish to follow a similar approach.
Block layer code must therefore expect to run in an IOThread and avoid using
old APIs that implicitly use the main loop. See the "How to program for
IOThreads" above for information on how to do that.
If main loop code such as a QMP function wishes to access a BlockDriverState it
must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure the
IOThread does not run in parallel.
Long-running jobs (usually in the form of coroutines) are best scheduled in the
BlockDriverState's AioContext to avoid the need to acquire/release around each
bdrv_*() call. Be aware that there is currently no mechanism to get notified
when bdrv_set_aio_context() moves this BlockDriverState to a different
AioContext (see bdrv_detach_aio_context()/bdrv_attach_aio_context()), so you
may need to add this if you want to support long-running jobs.

View file

@ -135,12 +135,12 @@ be stored. Each extension has a structure like the following:
Unless stated otherwise, each header extension type shall appear at most once
in the same image.
The remaining space between the end of the header extension area and the end of
the first cluster can be used for the backing file name. It is not allowed to
store other data here, so that an implementation can safely modify the header
and add extensions without harming data of compatible features that it
doesn't support. Compatible features that need space for additional data can
use a header extension.
If the image has a backing file then the backing file name should be stored in
the remaining space between the end of the header extension area and the end of
the first cluster. It is not allowed to store other data here, so that an
implementation can safely modify the header and add extensions without harming
data of compatible features that it doesn't support. Compatible features that
need space for additional data can use a header extension.
== Feature name table ==

View file

@ -589,6 +589,7 @@ static int blk_send_response_one(struct ioreq *ioreq)
break;
default:
dst = NULL;
return 0;
}
memcpy(dst, &resp, sizeof(resp));
blkdev->rings.common.rsp_prod_pvt++;

View file

@ -275,6 +275,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);
int bdrv_truncate(BlockDriverState *bs, int64_t offset);
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
@ -454,6 +455,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
size_t bdrv_opt_mem_align(BlockDriverState *bs);
void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
struct HBitmapIter;

View file

@ -223,4 +223,15 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
* Note that this function clobbers the handlers for the file descriptor.
*/
void coroutine_fn yield_until_fd_readable(int fd);
/**
* Add or subtract from the coroutine pool size
*
* The coroutine implementation keeps a pool of coroutines to be reused by
* qemu_coroutine_create(). This makes coroutine creation cheap. Heavy
* coroutine users should call this to reserve pool space. Call it again with
* a negative number to release pool space.
*/
void qemu_coroutine_adjust_pool_size(int n);
#endif /* QEMU_COROUTINE_H */

View file

@ -95,6 +95,7 @@ typedef signed int int_fast16_t;
#define qemu_printf printf
int qemu_daemon(int nochdir, int noclose);
void *qemu_try_memalign(size_t alignment, size_t size);
void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size);
void qemu_vfree(void *ptr);

View file

@ -115,6 +115,8 @@
# @format-specific: #optional structure supplying additional format-specific
# information (since 1.7)
#
# @nocow: #optional info of whether NOCOW flag is set or not. (since 2.2)
#
# Since: 1.3
#
##
@ -126,7 +128,8 @@
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
'*backing-image': 'ImageInfo',
'*format-specific': 'ImageInfoSpecific' } }
'*format-specific': 'ImageInfoSpecific',
'*nocow': 'bool' } }
##
# @ImageCheck:
@ -194,6 +197,7 @@
# 'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
# 'host_floppy', 'http', 'https', 'nbd', 'parallels', 'qcow',
# 'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
# 2.2: 'archipelago'
#
# @backing_file: #optional the name of the backing file (for copy-on-write)
#
@ -1143,7 +1147,7 @@
# Since: 2.0
##
{ 'enum': 'BlockdevDriver',
'data': [ 'file', 'host_device', 'host_cdrom', 'host_floppy',
'data': [ 'archipelago', 'file', 'host_device', 'host_cdrom', 'host_floppy',
'http', 'https', 'ftp', 'ftps', 'tftp', 'vvfat', 'blkdebug',
'blkverify', 'bochs', 'cloop', 'cow', 'dmg', 'parallels', 'qcow',
'qcow2', 'qed', 'raw', 'vdi', 'vhdx', 'vmdk', 'vpc', 'quorum' ] }
@ -1273,6 +1277,37 @@
'*pass-discard-snapshot': 'bool',
'*pass-discard-other': 'bool' } }
##
# @BlockdevOptionsArchipelago
#
# Driver specific block device options for Archipelago.
#
# @volume: Name of the Archipelago volume image
#
# @mport: #optional The port number on which mapperd is
# listening. This is optional
# and if not specified, QEMU will make Archipelago
# use the default port (1001).
#
# @vport: #optional The port number on which vlmcd is
# listening. This is optional
# and if not specified, QEMU will make Archipelago
# use the default port (501).
#
# @segment: #optional The name of the shared memory segment
# Archipelago stack is using. This is optional
# and if not specified, QEMU will make Archipelago
# use the default value, 'archipelago'.
# Since: 2.2
##
{ 'type': 'BlockdevOptionsArchipelago',
'data': { 'volume': 'str',
'*mport': 'int',
'*vport': 'int',
'*segment': 'str' } }
##
# @BlkdebugEvent
#
@ -1416,6 +1451,7 @@
'base': 'BlockdevOptionsBase',
'discriminator': 'driver',
'data': {
'archipelago':'BlockdevOptionsArchipelago',
'file': 'BlockdevOptionsFile',
'host_device':'BlockdevOptionsFile',
'host_cdrom': 'BlockdevOptionsFile',

View file

@ -182,9 +182,10 @@ static const char *find_typename_by_alias(const char *alias)
int qdev_device_help(QemuOpts *opts)
{
Error *local_err = NULL;
const char *driver;
Property *prop;
ObjectClass *klass;
DevicePropertyInfoList *prop_list;
DevicePropertyInfoList *prop;
driver = qemu_opt_get(opts, "driver");
if (driver && is_help_option(driver)) {
@ -196,35 +197,28 @@ int qdev_device_help(QemuOpts *opts)
return 0;
}
klass = object_class_by_name(driver);
if (!klass) {
if (!object_class_by_name(driver)) {
const char *typename = find_typename_by_alias(driver);
if (typename) {
driver = typename;
klass = object_class_by_name(driver);
}
}
if (!object_class_dynamic_cast(klass, TYPE_DEVICE)) {
return 0;
prop_list = qmp_device_list_properties(driver, &local_err);
if (!prop_list) {
error_printf("%s\n", error_get_pretty(local_err));
error_free(local_err);
return 1;
}
do {
for (prop = DEVICE_CLASS(klass)->props; prop && prop->name; prop++) {
/*
* TODO Properties without a parser are just for dirty hacks.
* qdev_prop_ptr is the only such PropertyInfo. It's marked
* for removal. This conditional should be removed along with
* it.
*/
if (!prop->info->set) {
continue; /* no way to set it, don't show */
}
error_printf("%s.%s=%s\n", driver, prop->name,
prop->info->legacy_name ?: prop->info->name);
}
klass = object_class_get_parent(klass);
} while (klass != object_class_by_name(TYPE_DEVICE));
for (prop = prop_list; prop; prop = prop->next) {
error_printf("%s.%s=%s\n", driver,
prop->value->name,
prop->value->type);
}
qapi_free_DevicePropertyInfoList(prop_list);
return 1;
}

View file

@ -19,14 +19,14 @@
#include "block/coroutine_int.h"
enum {
/* Maximum free pool size prevents holding too many freed coroutines */
POOL_MAX_SIZE = 64,
POOL_DEFAULT_SIZE = 64,
};
/** Free list to speed up creation */
static QemuMutex pool_lock;
static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
static unsigned int pool_size;
static unsigned int pool_max_size = POOL_DEFAULT_SIZE;
Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
{
@ -55,7 +55,7 @@ static void coroutine_delete(Coroutine *co)
{
if (CONFIG_COROUTINE_POOL) {
qemu_mutex_lock(&pool_lock);
if (pool_size < POOL_MAX_SIZE) {
if (pool_size < pool_max_size) {
QSLIST_INSERT_HEAD(&pool, co, pool_next);
co->caller = NULL;
pool_size++;
@ -137,3 +137,23 @@ void coroutine_fn qemu_coroutine_yield(void)
self->caller = NULL;
coroutine_swap(self, to);
}
void qemu_coroutine_adjust_pool_size(int n)
{
qemu_mutex_lock(&pool_lock);
pool_max_size += n;
/* Callers should never take away more than they added */
assert(pool_max_size >= POOL_DEFAULT_SIZE);
/* Trim oversized pool down to new max */
while (pool_size > pool_max_size) {
Coroutine *co = QSLIST_FIRST(&pool);
QSLIST_REMOVE_HEAD(&pool, pool_next);
pool_size--;
qemu_coroutine_delete(co);
}
qemu_mutex_unlock(&pool_lock);
}

View file

@ -960,7 +960,6 @@ static int img_compare(int argc, char **argv)
int64_t sector_num = 0;
int64_t nb_sectors;
int c, pnum;
uint64_t bs_sectors;
uint64_t progress_base;
for (;;) {
@ -1022,10 +1021,20 @@ static int img_compare(int argc, char **argv)
buf1 = qemu_blockalign(bs1, IO_BUF_SIZE);
buf2 = qemu_blockalign(bs2, IO_BUF_SIZE);
bdrv_get_geometry(bs1, &bs_sectors);
total_sectors1 = bs_sectors;
bdrv_get_geometry(bs2, &bs_sectors);
total_sectors2 = bs_sectors;
total_sectors1 = bdrv_nb_sectors(bs1);
if (total_sectors1 < 0) {
error_report("Can't get size of %s: %s",
filename1, strerror(-total_sectors1));
ret = 4;
goto out;
}
total_sectors2 = bdrv_nb_sectors(bs2);
if (total_sectors2 < 0) {
error_report("Can't get size of %s: %s",
filename2, strerror(-total_sectors2));
ret = 4;
goto out;
}
total_sectors = MIN(total_sectors1, total_sectors2);
progress_base = MAX(total_sectors1, total_sectors2);
@ -1187,7 +1196,7 @@ static int img_convert(int argc, char **argv)
BlockDriver *drv, *proto_drv;
BlockDriverState **bs = NULL, *out_bs = NULL;
int64_t total_sectors, nb_sectors, sector_num, bs_offset;
uint64_t bs_sectors;
int64_t *bs_sectors = NULL;
uint8_t * buf = NULL;
size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
const uint8_t *buf1;
@ -1328,7 +1337,8 @@ static int img_convert(int argc, char **argv)
qemu_progress_print(0, 100);
bs = g_malloc0(bs_n * sizeof(BlockDriverState *));
bs = g_new0(BlockDriverState *, bs_n);
bs_sectors = g_new(int64_t, bs_n);
total_sectors = 0;
for (bs_i = 0; bs_i < bs_n; bs_i++) {
@ -1342,8 +1352,14 @@ static int img_convert(int argc, char **argv)
ret = -1;
goto out;
}
bdrv_get_geometry(bs[bs_i], &bs_sectors);
total_sectors += bs_sectors;
bs_sectors[bs_i] = bdrv_nb_sectors(bs[bs_i]);
if (bs_sectors[bs_i] < 0) {
error_report("Could not get size of %s: %s",
argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
ret = -1;
goto out;
}
total_sectors += bs_sectors[bs_i];
}
if (sn_opts) {
@ -1461,7 +1477,6 @@ static int img_convert(int argc, char **argv)
bs_i = 0;
bs_offset = 0;
bdrv_get_geometry(bs[0], &bs_sectors);
/* increase bufsectors from the default 4096 (2M) if opt_transfer_length
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
@ -1474,13 +1489,13 @@ static int img_convert(int argc, char **argv)
buf = qemu_blockalign(out_bs, bufsectors * BDRV_SECTOR_SIZE);
if (skip_create) {
int64_t output_length = bdrv_getlength(out_bs);
if (output_length < 0) {
int64_t output_sectors = bdrv_nb_sectors(out_bs);
if (output_sectors < 0) {
error_report("unable to get output image length: %s\n",
strerror(-output_length));
strerror(-output_sectors));
ret = -1;
goto out;
} else if (output_length < total_sectors << BDRV_SECTOR_BITS) {
} else if (output_sectors < total_sectors) {
error_report("output file is smaller than input file");
ret = -1;
goto out;
@ -1528,19 +1543,19 @@ static int img_convert(int argc, char **argv)
buf2 = buf;
while (remainder > 0) {
int nlow;
while (bs_num == bs_sectors) {
while (bs_num == bs_sectors[bs_i]) {
bs_offset += bs_sectors[bs_i];
bs_i++;
assert (bs_i < bs_n);
bs_offset += bs_sectors;
bdrv_get_geometry(bs[bs_i], &bs_sectors);
bs_num = 0;
/* printf("changing part: sector_num=%" PRId64 ", "
"bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
"\n", sector_num, bs_i, bs_offset, bs_sectors); */
"\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
assert (bs_num < bs_sectors);
assert (bs_num < bs_sectors[bs_i]);
nlow = (remainder > bs_sectors - bs_num) ? bs_sectors - bs_num : remainder;
nlow = remainder > bs_sectors[bs_i] - bs_num
? bs_sectors[bs_i] - bs_num : remainder;
ret = bdrv_read(bs[bs_i], bs_num, buf2, nlow);
if (ret < 0) {
@ -1601,14 +1616,13 @@ restart:
break;
}
while (sector_num - bs_offset >= bs_sectors) {
while (sector_num - bs_offset >= bs_sectors[bs_i]) {
bs_offset += bs_sectors[bs_i];
bs_i ++;
assert (bs_i < bs_n);
bs_offset += bs_sectors;
bdrv_get_geometry(bs[bs_i], &bs_sectors);
/* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
"bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
sector_num, bs_i, bs_offset, bs_sectors); */
sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
}
if ((out_baseimg || has_zero_init) &&
@ -1661,7 +1675,7 @@ restart:
}
}
n = MIN(n, bs_sectors - (sector_num - bs_offset));
n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
sectors_read += n;
if (count_allocated_sectors) {
@ -1719,6 +1733,7 @@ out:
}
g_free(bs);
}
g_free(bs_sectors);
fail_getopt:
g_free(options);
@ -2418,9 +2433,9 @@ static int img_rebase(int argc, char **argv)
* the image is the same as the original one at any time.
*/
if (!unsafe) {
uint64_t num_sectors;
uint64_t old_backing_num_sectors;
uint64_t new_backing_num_sectors = 0;
int64_t num_sectors;
int64_t old_backing_num_sectors;
int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
uint8_t * buf_old;
@ -2430,10 +2445,31 @@ static int img_rebase(int argc, char **argv)
buf_old = qemu_blockalign(bs, IO_BUF_SIZE);
buf_new = qemu_blockalign(bs, IO_BUF_SIZE);
bdrv_get_geometry(bs, &num_sectors);
bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors);
num_sectors = bdrv_nb_sectors(bs);
if (num_sectors < 0) {
error_report("Could not get size of '%s': %s",
filename, strerror(-num_sectors));
ret = -1;
goto out;
}
old_backing_num_sectors = bdrv_nb_sectors(bs_old_backing);
if (old_backing_num_sectors < 0) {
char backing_name[1024];
bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
error_report("Could not get size of '%s': %s",
backing_name, strerror(-old_backing_num_sectors));
ret = -1;
goto out;
}
if (bs_new_backing) {
bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
new_backing_num_sectors = bdrv_nb_sectors(bs_new_backing);
if (new_backing_num_sectors < 0) {
error_report("Could not get size of '%s': %s",
out_baseimg, strerror(-new_backing_num_sectors));
ret = -1;
goto out;
}
}
if (num_sectors != 0) {

1
qmp.c
View file

@ -509,6 +509,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
if (strcmp(prop->name, "type") == 0 ||
strcmp(prop->name, "realized") == 0 ||
strcmp(prop->name, "hotpluggable") == 0 ||
strcmp(prop->name, "hotplugged") == 0 ||
strcmp(prop->name, "parent_bus") == 0) {
continue;
}

View file

@ -114,6 +114,10 @@ echo
echo "=== Testing version 3 ==="
_use_sample_img iotest-version3.vmdk.bz2
_img_info
for i in {0..99}; do
$QEMU_IO -r -c "read -P $(( i % 10 + 0x30 )) $(( i * 64 * 1024 * 10 + i * 512 )) 512" $TEST_IMG \
| _filter_qemu_io
done
echo
echo "=== Testing 4TB monolithicFlat creation and IO ==="

View file

@ -2056,8 +2056,208 @@ wrote 512/512 bytes at offset 10240
=== Testing version 3 ===
image: TEST_DIR/iotest-version3.IMGFMT
file format: IMGFMT
virtual size: 1.0G (1073741824 bytes)
virtual size: 16G (17179869184 bytes)
cluster_size: 65536
read 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 655872
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 1311744
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 1967616
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 2623488
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 3279360
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 3935232
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 4591104
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 5246976
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 5902848
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 6558720
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 7214592
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 7870464
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 8526336
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 9182208
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 9838080
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 10493952
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 11149824
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 11805696
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 12461568
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 13117440
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 13773312
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 14429184
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 15085056
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 15740928
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 16396800
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 17052672
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 17708544
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 18364416
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 19020288
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 19676160
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 20332032
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 20987904
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 21643776
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 22299648
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 22955520
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 23611392
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 24267264
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 24923136
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 25579008
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 26234880
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 26890752
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 27546624
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 28202496
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 28858368
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 29514240
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 30170112
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 30825984
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 31481856
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 32137728
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 32793600
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 33449472
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 34105344
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 34761216
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 35417088
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 36072960
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 36728832
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 37384704
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 38040576
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 38696448
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 39352320
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 40008192
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 40664064
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 41319936
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 41975808
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 42631680
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 43287552
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 43943424
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 44599296
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 45255168
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 45911040
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 46566912
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 47222784
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 47878656
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 48534528
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 49190400
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 49846272
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 50502144
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 51158016
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 51813888
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 52469760
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 53125632
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 53781504
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 54437376
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 55093248
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 55749120
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 56404992
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 57060864
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 57716736
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 58372608
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 59028480
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 59684352
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 60340224
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 60996096
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 61651968
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 62307840
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 62963712
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 63619584
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 64275456
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 512/512 bytes at offset 64931328
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
=== Testing 4TB monolithicFlat creation and IO ===
Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104

View file

@ -164,6 +164,15 @@ wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
echo
echo "=== Testing unallocated image header ==="
echo
_make_test_img 64M
# Create L1/L2
$QEMU_IO -c "$OPEN_RW" -c "write 0 64k" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
$QEMU_IO -c "$OPEN_RW" -c "write 64k 64k" | _filter_qemu_io
# success, all done
echo "*** done"
rm -f $seq.full

View file

@ -93,4 +93,12 @@ blkdebug: Suspended request '0'
write failed: Input/output error
blkdebug: Resuming request '0'
aio_write failed: No medium found
=== Testing unallocated image header ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
qcow2: Preventing invalid write on metadata (overlaps with qcow2_header); image marked as corrupt.
write failed: Input/output error
*** done

View file

@ -1,6 +1,7 @@
#!/bin/bash
#
# Test case for VDI header corruption; image too large, and too many blocks
# Test case for VDI header corruption; image too large, and too many blocks.
# Also simple test for creating dynamic and static VDI images.
#
# Copyright (C) 2013 Red Hat, Inc.
#
@ -43,14 +44,25 @@ _supported_fmt vdi
_supported_proto generic
_supported_os Linux
size=64M
ds_offset=368 # disk image size field offset
bs_offset=376 # block size field offset
bii_offset=384 # block in image field offset
echo
echo "=== Statically allocated image creation ==="
echo
_make_test_img $size -o static
_img_info
stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
_cleanup_test_img
echo
echo "=== Testing image size bounds ==="
echo
_make_test_img 64M
_make_test_img $size
_img_info
stat -c"disk image file size in bytes: %s" "${TEST_IMG}"
# check for image size too large
# poke max image size, and appropriate blocks_in_image value

View file

@ -1,8 +1,22 @@
QA output created by 084
=== Statically allocated image creation ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
cluster_size: 1048576
disk image file size in bytes: 67109888
=== Testing image size bounds ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
image: TEST_DIR/t.IMGFMT
file format: IMGFMT
virtual size: 64M (67108864 bytes)
cluster_size: 1048576
disk image file size in bytes: 1024
Test 1: Maximum size (1024 TB):
qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Invalid argument

View file

@ -152,6 +152,7 @@ check options
-nbd test nbd
-ssh test ssh
-nfs test nfs
-archipelago test archipelago
-xdiff graphical mode diff
-nocache use O_DIRECT on backing file
-misalign misalign memory allocations
@ -263,6 +264,11 @@ testlist options
xpand=false
;;
-archipelago)
IMGPROTO=archipelago
xpand=false
;;
-nocache)
CACHEMODE="none"
CACHEMODE_IS_DEFAULT=false

View file

@ -64,6 +64,8 @@ elif [ "$IMGPROTO" = "ssh" ]; then
elif [ "$IMGPROTO" = "nfs" ]; then
TEST_DIR="nfs://127.0.0.1/$TEST_DIR"
TEST_IMG=$TEST_DIR/t.$IMGFMT
elif [ "$IMGPROTO" = "archipelago" ]; then
TEST_IMG="archipelago:at.$IMGFMT"
else
TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
fi
@ -163,7 +165,8 @@ _make_test_img()
-e "s# lazy_refcounts=\\(on\\|off\\)##g" \
-e "s# block_size=[0-9]\\+##g" \
-e "s# block_state_zero=\\(on\\|off\\)##g" \
-e "s# log_size=[0-9]\\+##g"
-e "s# log_size=[0-9]\\+##g" \
-e "s/archipelago:a/TEST_DIR\//g"
# Start an NBD server on the image file, which is what we'll be talking to
if [ $IMGPROTO = "nbd" ]; then
@ -206,6 +209,10 @@ _cleanup_test_img()
rbd --no-progress rm "$TEST_DIR/t.$IMGFMT" > /dev/null
;;
archipelago)
vlmc remove "at.$IMGFMT" > /dev/null
;;
sheepdog)
collie vdi delete "$TEST_DIR/t.$IMGFMT"
;;

View file

@ -288,6 +288,29 @@ static void perf_yield(void)
maxcycles, duration);
}
static __attribute__((noinline)) void dummy(unsigned *i)
{
(*i)--;
}
static void perf_baseline(void)
{
unsigned int i, maxcycles;
double duration;
maxcycles = 100000000;
i = maxcycles;
g_test_timer_start();
while (i > 0) {
dummy(&i);
}
duration = g_test_timer_elapsed();
g_test_message("Function call %u iterations: %f s\n",
maxcycles, duration);
}
int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
@ -301,6 +324,7 @@ int main(int argc, char **argv)
g_test_add_func("/perf/lifecycle", perf_lifecycle);
g_test_add_func("/perf/nesting", perf_nesting);
g_test_add_func("/perf/yield", perf_yield);
g_test_add_func("/perf/function-call", perf_baseline);
}
return g_test_run();
}

View file

@ -21,7 +21,6 @@
#include "block/coroutine.h"
#include "trace.h"
#include "block/block_int.h"
#include "qemu/event_notifier.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
@ -57,8 +56,8 @@ struct ThreadPoolElement {
};
struct ThreadPool {
EventNotifier notifier;
AioContext *ctx;
QEMUBH *completion_bh;
QemuMutex lock;
QemuCond check_cancel;
QemuCond worker_stopped;
@ -119,7 +118,7 @@ static void *worker_thread(void *opaque)
qemu_cond_broadcast(&pool->check_cancel);
}
event_notifier_set(&pool->notifier);
qemu_bh_schedule(pool->completion_bh);
}
pool->cur_threads--;
@ -168,12 +167,11 @@ static void spawn_thread(ThreadPool *pool)
}
}
static void event_notifier_ready(EventNotifier *notifier)
static void thread_pool_completion_bh(void *opaque)
{
ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
event_notifier_test_and_clear(notifier);
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@ -187,6 +185,12 @@ restart:
QLIST_REMOVE(elem, all);
/* Read state before ret. */
smp_rmb();
/* Schedule ourselves in case elem->common.cb() calls aio_poll() to
* wait for another request that completed at the same time.
*/
qemu_bh_schedule(pool->completion_bh);
elem->common.cb(elem->common.opaque, elem->ret);
qemu_aio_release(elem);
goto restart;
@ -215,7 +219,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
qemu_sem_timedwait(&pool->sem, 0) == 0) {
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
elem->state = THREAD_CANCELED;
event_notifier_set(&pool->notifier);
qemu_bh_schedule(pool->completion_bh);
} else {
pool->pending_cancellations++;
while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@ -224,7 +228,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
pool->pending_cancellations--;
}
qemu_mutex_unlock(&pool->lock);
event_notifier_ready(&pool->notifier);
thread_pool_completion_bh(pool);
}
static const AIOCBInfo thread_pool_aiocb_info = {
@ -293,8 +297,8 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
}
memset(pool, 0, sizeof(*pool));
event_notifier_init(&pool->notifier, false);
pool->ctx = ctx;
pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
qemu_mutex_init(&pool->lock);
qemu_cond_init(&pool->check_cancel);
qemu_cond_init(&pool->worker_stopped);
@ -304,8 +308,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
QLIST_INIT(&pool->head);
QTAILQ_INIT(&pool->request_list);
aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready);
}
ThreadPool *thread_pool_new(AioContext *ctx)
@ -339,11 +341,10 @@ void thread_pool_free(ThreadPool *pool)
qemu_mutex_unlock(&pool->lock);
aio_set_event_notifier(pool->ctx, &pool->notifier, NULL);
qemu_bh_delete(pool->completion_bh);
qemu_sem_destroy(&pool->sem);
qemu_cond_destroy(&pool->check_cancel);
qemu_cond_destroy(&pool->worker_stopped);
qemu_mutex_destroy(&pool->lock);
event_notifier_cleanup(&pool->notifier);
g_free(pool);
}

View file

@ -94,7 +94,7 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
void *qemu_memalign(size_t alignment, size_t size)
void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
@ -106,19 +106,23 @@ void *qemu_memalign(size_t alignment, size_t size)
int ret;
ret = posix_memalign(&ptr, alignment, size);
if (ret != 0) {
fprintf(stderr, "Failed to allocate %zu B: %s\n",
size, strerror(ret));
abort();
errno = ret;
ptr = NULL;
}
#elif defined(CONFIG_BSD)
ptr = qemu_oom_check(valloc(size));
ptr = valloc(size);
#else
ptr = qemu_oom_check(memalign(alignment, size));
ptr = memalign(alignment, size);
#endif
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
void *qemu_memalign(size_t alignment, size_t size)
{
return qemu_oom_check(qemu_try_memalign(alignment, size));
}
/* alloc shared memory pages */
void *qemu_anon_ram_alloc(size_t size)
{

View file

@ -50,18 +50,23 @@ void *qemu_oom_check(void *ptr)
return ptr;
}
void *qemu_memalign(size_t alignment, size_t size)
void *qemu_try_memalign(size_t alignment, size_t size)
{
void *ptr;
if (!size) {
abort();
}
ptr = qemu_oom_check(VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE));
ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
void *qemu_memalign(size_t alignment, size_t size)
{
return qemu_oom_check(qemu_try_memalign(alignment, size));
}
void *qemu_anon_ram_alloc(size_t size)
{
void *ptr;