Merge remote branch 'kwolf/for-anthony' into staging

This commit is contained in:
Anthony Liguori 2010-09-21 17:50:58 -05:00
commit e1bb0a1a6c
23 changed files with 721 additions and 123 deletions

View file

@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o

382
block/blkverify.c Normal file
View file

@ -0,0 +1,382 @@
/*
* Block protocol for block driver correctness testing
*
* Copyright (C) 2010 IBM, Corp.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include <stdarg.h>
#include "qemu_socket.h" /* for EINPROGRESS on Windows */
#include "block_int.h"
typedef struct {
BlockDriverState *test_file;
} BDRVBlkverifyState;
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
struct BlkverifyAIOCB {
BlockDriverAIOCB common;
QEMUBH *bh;
/* Request metadata */
bool is_write;
int64_t sector_num;
int nb_sectors;
int ret; /* first completed request's result */
unsigned int done; /* completion counter */
bool *finished; /* completion signal for cancel */
QEMUIOVector *qiov; /* user I/O vector */
QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
void *buf; /* buffer for raw file I/O */
void (*verify)(BlkverifyAIOCB *acb);
};
static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
{
BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
bool finished = false;
/* Wait until request completes, invokes its callback, and frees itself */
acb->finished = &finished;
while (!finished) {
qemu_aio_wait();
}
}
static AIOPool blkverify_aio_pool = {
.aiocb_size = sizeof(BlkverifyAIOCB),
.cancel = blkverify_aio_cancel,
};
static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
acb->is_write ? "write" : "read", acb->sector_num,
acb->nb_sectors);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
static int blkverify_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVBlkverifyState *s = bs->opaque;
int ret;
char *raw, *c;
/* Parse the blkverify: prefix */
if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
return -EINVAL;
}
filename += strlen("blkverify:");
/* Parse the raw image filename */
c = strchr(filename, ':');
if (c == NULL) {
return -EINVAL;
}
raw = strdup(filename);
raw[c - filename] = '\0';
ret = bdrv_file_open(&bs->file, raw, flags);
free(raw);
if (ret < 0) {
return ret;
}
filename = c + 1;
/* Open the test file */
s->test_file = bdrv_new("");
ret = bdrv_open(s->test_file, filename, flags, NULL);
if (ret < 0) {
bdrv_delete(s->test_file);
s->test_file = NULL;
return ret;
}
return 0;
}
static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
bdrv_delete(s->test_file);
s->test_file = NULL;
}
static void blkverify_flush(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
bdrv_flush(s->test_file);
}
static int64_t blkverify_getlength(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
return bdrv_getlength(s->test_file);
}
/**
* Check that I/O vector contents are identical
*
* @a: I/O vector
* @b: I/O vector
* @ret: Offset to first mismatching byte or -1 if match
*/
static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
{
int i;
ssize_t offset = 0;
assert(a->niov == b->niov);
for (i = 0; i < a->niov; i++) {
size_t len = 0;
uint8_t *p = (uint8_t *)a->iov[i].iov_base;
uint8_t *q = (uint8_t *)b->iov[i].iov_base;
assert(a->iov[i].iov_len == b->iov[i].iov_len);
while (len < a->iov[i].iov_len && *p++ == *q++) {
len++;
}
offset += len;
if (len != a->iov[i].iov_len) {
return offset;
}
}
return -1;
}
typedef struct {
int src_index;
struct iovec *src_iov;
void *dest_base;
} IOVectorSortElem;
static int sortelem_cmp_src_base(const void *a, const void *b)
{
const IOVectorSortElem *elem_a = a;
const IOVectorSortElem *elem_b = b;
/* Don't overflow */
if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
return -1;
} else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
return 1;
} else {
return 0;
}
}
static int sortelem_cmp_src_index(const void *a, const void *b)
{
const IOVectorSortElem *elem_a = a;
const IOVectorSortElem *elem_b = b;
return elem_a->src_index - elem_b->src_index;
}
/**
* Copy contents of I/O vector
*
* The relative relationships of overlapping iovecs are preserved. This is
* necessary to ensure identical semantics in the cloned I/O vector.
*/
static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
void *buf)
{
IOVectorSortElem sortelems[src->niov];
void *last_end;
int i;
/* Sort by source iovecs by base address */
for (i = 0; i < src->niov; i++) {
sortelems[i].src_index = i;
sortelems[i].src_iov = &src->iov[i];
}
qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
/* Allocate buffer space taking into account overlapping iovecs */
last_end = NULL;
for (i = 0; i < src->niov; i++) {
struct iovec *cur = sortelems[i].src_iov;
ptrdiff_t rewind = 0;
/* Detect overlap */
if (last_end && last_end > cur->iov_base) {
rewind = last_end - cur->iov_base;
}
sortelems[i].dest_base = buf - rewind;
buf += cur->iov_len - MIN(rewind, cur->iov_len);
last_end = MAX(cur->iov_base + cur->iov_len, last_end);
}
/* Sort by source iovec index and build destination iovec */
qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
for (i = 0; i < src->niov; i++) {
qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
}
}
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
int64_t sector_num, QEMUIOVector *qiov,
int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
acb->bh = NULL;
acb->is_write = is_write;
acb->sector_num = sector_num;
acb->nb_sectors = nb_sectors;
acb->ret = -EINPROGRESS;
acb->done = 0;
acb->qiov = qiov;
acb->buf = NULL;
acb->verify = NULL;
acb->finished = NULL;
return acb;
}
static void blkverify_aio_bh(void *opaque)
{
BlkverifyAIOCB *acb = opaque;
qemu_bh_delete(acb->bh);
if (acb->buf) {
qemu_iovec_destroy(&acb->raw_qiov);
qemu_vfree(acb->buf);
}
acb->common.cb(acb->common.opaque, acb->ret);
if (acb->finished) {
*acb->finished = true;
}
qemu_aio_release(acb);
}
static void blkverify_aio_cb(void *opaque, int ret)
{
BlkverifyAIOCB *acb = opaque;
switch (++acb->done) {
case 1:
acb->ret = ret;
break;
case 2:
if (acb->ret != ret) {
blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
}
if (acb->verify) {
acb->verify(acb);
}
acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
qemu_bh_schedule(acb->bh);
break;
}
}
static void blkverify_verify_readv(BlkverifyAIOCB *acb)
{
ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
if (offset != -1) {
blkverify_err(acb, "contents mismatch in sector %ld",
acb->sector_num + (offset / BDRV_SECTOR_SIZE));
}
}
static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVBlkverifyState *s = bs->opaque;
BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
nb_sectors, cb, opaque);
acb->verify = blkverify_verify_readv;
acb->buf = qemu_blockalign(bs->file, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb)) {
blkverify_aio_cb(acb, -EIO);
}
if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
blkverify_aio_cb, acb)) {
blkverify_aio_cb(acb, -EIO);
}
return &acb->common;
}
static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVBlkverifyState *s = bs->opaque;
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
nb_sectors, cb, opaque);
if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb)) {
blkverify_aio_cb(acb, -EIO);
}
if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb)) {
blkverify_aio_cb(acb, -EIO);
}
return &acb->common;
}
static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
return bdrv_aio_flush(s->test_file, cb, opaque);
}
static BlockDriver bdrv_blkverify = {
.format_name = "blkverify",
.protocol_name = "blkverify",
.instance_size = sizeof(BDRVBlkverifyState),
.bdrv_getlength = blkverify_getlength,
.bdrv_file_open = blkverify_open,
.bdrv_close = blkverify_close,
.bdrv_flush = blkverify_flush,
.bdrv_aio_readv = blkverify_aio_readv,
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
};
static void bdrv_blkverify_init(void)
{
bdrv_register(&bdrv_blkverify);
}
block_init(bdrv_blkverify_init);

View file

@ -95,8 +95,6 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
if (r == p) {
goto out;
}
} else if (name == NULL) {
goto out;
}
sock = tcp_socket_outgoing(hostname, port);

View file

@ -60,6 +60,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
qemu_free(new_l1_table);
return new_l1_table_offset;
}
bdrv_flush(bs->file);
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
@ -243,6 +244,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
if (l2_offset < 0) {
return l2_offset;
}
bdrv_flush(bs->file);
/* allocate a new entry in the l2 cache */
@ -348,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1;
uint64_t cluster_offset;
struct iovec iov;
QEMUIOVector qiov;
while (nb_sectors > 0) {
n = nb_sectors;
@ -362,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n);
iov.iov_base = buf;
iov.iov_len = n * 512;
qemu_iovec_init_external(&qiov, &iov, 1);
n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
@ -413,7 +421,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
&s->aes_encrypt_key);
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_write_sync(bs->file, (cluster_offset >> 9) + n_start,
ret = bdrv_write(bs->file, (cluster_offset >> 9) + n_start,
s->cluster_data, n);
if (ret < 0)
return ret;
@ -712,6 +720,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
}
/*
* Before we update the L2 table to actually point to the new cluster, we
* need to be sure that the refcounts have been increased and COW was
* handled.
*/
bdrv_flush(bs->file);
ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
if (ret < 0) {
qcow2_l2_cache_reset(bs);

View file

@ -261,6 +261,8 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
goto fail_block;
}
bdrv_flush(bs->file);
/* Initialize the new refcount block only after updating its refcount,
* update_refcount uses the refcount cache itself */
memset(s->refcount_block_cache, 0, s->cluster_size);
@ -444,7 +446,7 @@ static int write_refcount_block_entries(BlockDriverState *bs,
size = (last_index - first_index) << REFCOUNT_SHIFT;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
ret = bdrv_pwrite_sync(bs->file,
ret = bdrv_pwrite(bs->file,
refcount_block_offset + (first_index << REFCOUNT_SHIFT),
&s->refcount_block_cache[first_index], size);
if (ret < 0) {
@ -573,6 +575,8 @@ static int update_cluster_refcount(BlockDriverState *bs,
return ret;
}
bdrv_flush(bs->file);
return get_refcount(bs, cluster_index);
}
@ -624,6 +628,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
if (ret < 0) {
return ret;
}
return offset;
}
@ -671,6 +676,8 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
goto redo;
}
}
bdrv_flush(bs->file);
return offset;
}
@ -801,6 +808,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
/* TODO Flushing once for the whole function should
* be enough */
bdrv_flush(bs->file);
}
/* compressed clusters are never modified */
refcount = 2;

View file

@ -138,6 +138,7 @@ static int qcow_write_snapshots(BlockDriverState *bs)
snapshots_size = offset;
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
bdrv_flush(bs->file);
offset = snapshots_offset;
if (offset < 0) {
return offset;
@ -271,6 +272,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
if (l1_table_offset < 0) {
goto fail;
}
bdrv_flush(bs->file);
sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;

View file

@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
}
/* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors)
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors)
{
int n1;
if ((sector_num + nb_sectors) <= bs->total_sectors)
@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs,
n1 = 0;
else
n1 = bs->total_sectors - sector_num;
memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1));
return n1;
}
@ -329,14 +331,12 @@ typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
QEMUIOVector *qiov;
uint8_t *buf;
void *orig_buf;
int remaining_sectors;
int cur_nr_sectors; /* number of sectors in current iteration */
uint64_t bytes_done;
uint64_t cluster_offset;
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
struct iovec hd_iov;
QEMUIOVector hd_qiov;
QEMUBH *bh;
QCowL2Meta l2meta;
@ -397,15 +397,19 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* nothing to do */
} else {
if (s->crypt_method) {
qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
acb->cur_nr_sectors, 0,
&s->aes_decrypt_key);
qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
acb->cur_nr_sectors * 512);
qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data,
512 * acb->cur_nr_sectors);
}
}
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
acb->buf += acb->cur_nr_sectors * 512;
acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) {
/* request completed */
@ -415,6 +419,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* prepare next AIO request */
acb->cur_nr_sectors = acb->remaining_sectors;
if (s->crypt_method) {
acb->cur_nr_sectors = MIN(acb->cur_nr_sectors,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
}
ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
&acb->cur_nr_sectors, &acb->cluster_offset);
if (ret < 0) {
@ -423,15 +432,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
acb->cur_nr_sectors * 512);
if (!acb->cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
acb->buf, acb->cur_nr_sectors);
n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov,
acb->sector_num, acb->cur_nr_sectors);
if (n1 > 0) {
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_qiov, acb->cur_nr_sectors,
@ -445,7 +456,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->cur_nr_sectors);
qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@ -454,8 +465,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* add AIO support for compressed blocks ? */
if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
goto done;
memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
512 * acb->cur_nr_sectors);
qemu_iovec_from_buffer(&acb->hd_qiov,
s->cluster_cache + index_in_cluster * 512,
512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@ -465,9 +479,23 @@ static void qcow_aio_read_cb(void *opaque, int ret)
goto done;
}
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
if (s->crypt_method) {
/*
* For encrypted images, read everything into a temporary
* contiguous buffer on which the AES functions can work.
*/
if (!acb->cluster_data) {
acb->cluster_data =
qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
assert(acb->cur_nr_sectors <=
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
512 * acb->cur_nr_sectors);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
@ -481,11 +509,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
return;
done:
if (acb->qiov->niov > 1) {
qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
qemu_vfree(acb->orig_buf);
}
acb->common.cb(acb->common.opaque, ret);
qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb);
}
@ -501,13 +526,10 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
if (qiov->niov > 1) {
acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
if (is_write)
qemu_iovec_to_buffer(qiov, acb->buf);
} else {
acb->buf = (uint8_t *)qiov->iov->iov_base;
}
qemu_iovec_init(&acb->hd_qiov, qiov->niov);
acb->bytes_done = 0;
acb->remaining_sectors = nb_sectors;
acb->cur_nr_sectors = 0;
acb->cluster_offset = 0;
@ -557,7 +579,6 @@ static void qcow_aio_write_cb(void *opaque, int ret)
BlockDriverState *bs = acb->common.bs;
BDRVQcowState *s = bs->opaque;
int index_in_cluster;
const uint8_t *src_buf;
int n_end;
acb->hd_aiocb = NULL;
@ -573,7 +594,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
acb->buf += acb->cur_nr_sectors * 512;
acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) {
/* request completed */
@ -604,20 +625,27 @@ static void qcow_aio_write_cb(void *opaque, int ret)
assert((acb->cluster_offset & 511) == 0);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
acb->cur_nr_sectors * 512);
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
s->cluster_size);
}
qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
src_buf = acb->cluster_data;
} else {
src_buf = acb->buf;
assert(acb->hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buffer(&acb->hd_qiov, acb->cluster_data);
qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
acb->cluster_data, acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
qemu_iovec_reset(&acb->hd_qiov);
qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
acb->cur_nr_sectors * 512);
}
acb->hd_iov.iov_base = (void *)src_buf;
acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
acb->hd_aiocb = bdrv_aio_writev(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
@ -635,9 +663,8 @@ fail:
QLIST_REMOVE(&acb->l2meta, next_in_flight);
}
done:
if (acb->qiov->niov > 1)
qemu_vfree(acb->orig_buf);
acb->common.cb(acb->common.opaque, ret);
qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb);
}

View file

@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n)
// FIXME Need qcow2_ prefix to global functions
/* qcow2.c functions */
int qcow2_backing_read1(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors);
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
/* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs);

View file

@ -97,12 +97,12 @@
#define FTYPE_CD 1
#define FTYPE_FD 2
#define ALIGNED_BUFFER_SIZE (32 * 512)
/* if the FD is not accessed during that time (in ms), we try to
reopen it to see if the disk has been changed */
#define FD_OPEN_TIMEOUT 1000
#define MAX_BLOCKSIZE 4096
typedef struct BDRVRawState {
int fd;
int type;
@ -118,7 +118,8 @@ typedef struct BDRVRawState {
int use_aio;
void *aio_ctx;
#endif
uint8_t* aligned_buf;
uint8_t *aligned_buf;
unsigned aligned_buf_size;
} BDRVRawState;
static int fd_open(BlockDriverState *bs);
@ -161,7 +162,12 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
s->aligned_buf = NULL;
if ((bdrv_flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
/*
* Allocate a buffer for read/modify/write cycles. Chose the size
* pessimistically as we don't know the block size yet.
*/
s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
if (s->aligned_buf == NULL) {
goto out_close;
}
@ -278,8 +284,9 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
}
/*
* offset and count are in bytes, but must be multiples of 512 for files
* opened with O_DIRECT. buf must be aligned to 512 bytes then.
* offset and count are in bytes, but must be multiples of the sector size
* for files opened with O_DIRECT. buf must be aligned to sector size bytes
* then.
*
* This function may be called without alignment if the caller ensures
* that O_DIRECT is not in effect.
@ -316,24 +323,25 @@ static int raw_pread(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
if (offset & 0x1ff) {
/* align offset on a 512 bytes boundary */
if (offset & sector_mask) {
/* align offset on a sector size bytes boundary */
shift = offset & 0x1ff;
size = (shift + count + 0x1ff) & ~0x1ff;
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
shift = offset & sector_mask;
size = (shift + count + sector_mask) & ~sector_mask;
if (size > s->aligned_buf_size)
size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
if (ret < 0)
return ret;
size = 512 - shift;
size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(buf, s->aligned_buf + shift, size);
@ -346,15 +354,15 @@ static int raw_pread(BlockDriverState *bs, int64_t offset,
if (count == 0)
return sum;
}
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
if (count & sector_mask || (uintptr_t) buf & sector_mask) {
/* read on aligned buffer */
while (count) {
size = (count + 0x1ff) & ~0x1ff;
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
size = (count + sector_mask) & ~sector_mask;
if (size > s->aligned_buf_size)
size = s->aligned_buf_size;
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
if (ret < 0) {
@ -404,25 +412,28 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
const uint8_t *buf, int count)
{
BDRVRawState *s = bs->opaque;
unsigned sector_mask = bs->buffer_alignment - 1;
int size, ret, shift, sum;
sum = 0;
if (s->aligned_buf != NULL) {
if (offset & 0x1ff) {
/* align offset on a 512 bytes boundary */
shift = offset & 0x1ff;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
if (offset & sector_mask) {
/* align offset on a sector size bytes boundary */
shift = offset & sector_mask;
ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf,
bs->buffer_alignment);
if (ret < 0)
return ret;
size = 512 - shift;
size = bs->buffer_alignment - shift;
if (size > count)
size = count;
memcpy(s->aligned_buf + shift, buf, size);
ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf,
bs->buffer_alignment);
if (ret < 0)
return ret;
@ -434,12 +445,12 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
if (count == 0)
return sum;
}
if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
if (count & sector_mask || (uintptr_t) buf & sector_mask) {
while ((size = (count & ~0x1ff)) != 0) {
while ((size = (count & ~sector_mask)) != 0) {
if (size > ALIGNED_BUFFER_SIZE)
size = ALIGNED_BUFFER_SIZE;
if (size > s->aligned_buf_size)
size = s->aligned_buf_size;
memcpy(s->aligned_buf, buf, size);
@ -452,14 +463,16 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
count -= ret;
sum += ret;
}
/* here, count < 512 because (count & ~0x1ff) == 0 */
/* here, count < 512 because (count & ~sector_mask) == 0 */
if (count) {
ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
ret = raw_pread_aligned(bs, offset, s->aligned_buf,
bs->buffer_alignment);
if (ret < 0)
return ret;
memcpy(s->aligned_buf, buf, count);
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
ret = raw_pwrite_aligned(bs, offset, s->aligned_buf,
bs->buffer_alignment);
if (ret < 0)
return ret;
if (count < ret)
@ -487,12 +500,12 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num,
/*
* Check if all memory in this vector is sector aligned.
*/
static int qiov_is_aligned(QEMUIOVector *qiov)
static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % BDRV_SECTOR_SIZE) {
if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
return 0;
}
}
@ -515,7 +528,7 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
* driver that it needs to copy the buffer.
*/
if (s->aligned_buf) {
if (!qiov_is_aligned(qiov)) {
if (!qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_aio) {

View file

@ -2665,6 +2665,11 @@ static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
DLOG(checkpoint());
/* Check if we're operating in read-only mode */
if (s->qcow == NULL) {
return -EACCES;
}
vvfat_close_current_file(s);
/*
@ -2763,12 +2768,12 @@ static int vvfat_is_allocated(BlockDriverState *bs,
static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
const uint8_t* buffer, int nb_sectors) {
BDRVVVFATState* s = bs->opaque;
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
return try_commit(s);
}
static void write_target_close(BlockDriverState *bs) {
BDRVVVFATState* s = bs->opaque;
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
bdrv_delete(s->qcow);
free(s->qcow_filename);
}
@ -2783,6 +2788,7 @@ static int enable_write_target(BDRVVVFATState *s)
{
BlockDriver *bdrv_qcow;
QEMUOptionParameter *options;
int ret;
int size = sector2cluster(s, s->sector_count);
s->used_clusters = calloc(size, 1);
@ -2798,11 +2804,16 @@ static int enable_write_target(BDRVVVFATState *s)
if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
return -1;
s->qcow = bdrv_new("");
if (s->qcow == NULL ||
bdrv_open(s->qcow, s->qcow_filename, BDRV_O_RDWR, bdrv_qcow) < 0)
{
return -1;
if (s->qcow == NULL) {
return -1;
}
ret = bdrv_open(s->qcow, s->qcow_filename,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
return ret;
}
#ifndef _WIN32
@ -2811,7 +2822,8 @@ static int enable_write_target(BDRVVVFATState *s)
s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
s->bs->backing_hd->drv = &vvfat_write_target;
s->bs->backing_hd->opaque = s;
s->bs->backing_hd->opaque = qemu_malloc(sizeof(void*));
*(void**)s->bs->backing_hd->opaque = s;
return 0;
}

View file

@ -168,30 +168,50 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
}
/*
* Copies iovecs from src to the end dst until src is completely copied or the
* total size of the copied iovec reaches size. The size of the last copied
* iovec is changed in order to fit the specified total size if it isn't a
* perfect fit already.
* Copies iovecs from src to the end of dst. It starts copying after skipping
* the given number of bytes in src and copies until src is completely copied
* or the total size of the copied iovec reaches size.The size of the last
* copied iovec is changed in order to fit the specified total size if it isn't
* a perfect fit already.
*/
void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
size_t size)
{
int i;
size_t done;
void *iov_base;
uint64_t iov_len;
assert(dst->nalloc != -1);
done = 0;
for (i = 0; (i < src->niov) && (done != size); i++) {
if (done + src->iov[i].iov_len > size) {
qemu_iovec_add(dst, src->iov[i].iov_base, size - done);
if (skip >= src->iov[i].iov_len) {
/* Skip the whole iov */
skip -= src->iov[i].iov_len;
continue;
} else {
/* Skip only part (or nothing) of the iov */
iov_base = (uint8_t*) src->iov[i].iov_base + skip;
iov_len = src->iov[i].iov_len - skip;
skip = 0;
}
if (done + iov_len > size) {
qemu_iovec_add(dst, iov_base, size - done);
break;
} else {
qemu_iovec_add(dst, src->iov[i].iov_base, src->iov[i].iov_len);
qemu_iovec_add(dst, iov_base, iov_len);
}
done += src->iov[i].iov_len;
done += iov_len;
}
}
void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size)
{
qemu_iovec_copy(dst, src, 0, size);
}
void qemu_iovec_destroy(QEMUIOVector *qiov)
{
assert(qiov->nalloc != -1);
@ -234,6 +254,18 @@ void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
}
}
void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count)
{
size_t n;
int i;
for (i = 0; i < qiov->niov && count; ++i) {
n = MIN(count, qiov->iov[i].iov_len);
memset(qiov->iov[i].iov_base, c, n);
count -= n;
}
}
#ifndef _WIN32
/* Sets a specific flag */
int fcntl_setfl(int fd, int flag)

69
docs/blkverify.txt Normal file
View file

@ -0,0 +1,69 @@
= Block driver correctness testing with blkverify =
== Introduction ==
This document describes how to use the blkverify protocol to test that a block
driver is operating correctly.
It is difficult to test and debug block drivers against real guests. Often
processes inside the guest will crash because corrupt sectors were read as part
of the executable. Other times obscure errors are raised by a program inside
the guest. These issues are extremely hard to trace back to bugs in the block
driver.
Blkverify solves this problem by catching data corruption inside QEMU the first
time bad data is read and reporting the disk sector that is corrupted.
== How it works ==
The blkverify protocol has two child block devices, the "test" device and the
"raw" device. Read/write operations are mirrored to both devices so their
state should always be in sync.
The "raw" device is a raw image, a flat file, that has identical starting
contents to the "test" image. The idea is that the "raw" device will handle
read/write operations correctly and not corrupt data. It can be used as a
reference for comparison against the "test" device.
After a mirrored read operation completes, blkverify will compare the data and
raise an error if it is not identical. This makes it possible to catch the
first instance where corrupt data is read.
== Example ==
Imagine raw.img has 0xcd repeated throughout its first sector:
$ ./qemu-io -c 'read -v 0 512' raw.img
00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
[...]
000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd ................
read 512/512 bytes at offset 0
512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000 ops/sec)
And test.img is corrupt, its first sector is zeroed when it shouldn't be:
$ ./qemu-io -c 'read -v 0 512' test.img
00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
[...]
000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
read 512/512 bytes at offset 0
512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667 ops/sec)
This error is caught by blkverify:
$ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
A more realistic scenario is verifying the installation of a guest OS:
$ ./qemu-img create raw.img 16G
$ ./qemu-img create -f qcow2 test.qcow2 16G
$ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
-drive file=blkverify:raw.img:test.qcow2
If the installation is aborted when blkverify detects corruption, use qemu-io
to explore the contents of the disk image at the sector in question.

View file

@ -2645,6 +2645,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs,
if (bdrv_get_type_hint(bs) == BDRV_TYPE_CDROM) {
s->drive_kind = IDE_CD;
bdrv_set_change_cb(bs, cdrom_change_cb, s);
bs->buffer_alignment = 2048;
} else {
if (!bdrv_is_inserted(s->bs)) {
error_report("Device needs media, but drive is empty");
@ -2679,7 +2680,8 @@ static void ide_init1(IDEBus *bus, int unit)
s->bus = bus;
s->unit = unit;
s->drive_serial = drive_serial++;
s->io_buffer = qemu_blockalign(s->bs, IDE_DMA_BUF_SECTORS*512 + 4);
/* we need at least 2k alignment for accessing CDROMs using O_DIRECT */
s->io_buffer = qemu_memalign(2048, IDE_DMA_BUF_SECTORS*512 + 4);
s->io_buffer_total_len = IDE_DMA_BUF_SECTORS*512 + 4;
s->smart_selftest_data = qemu_blockalign(s->bs, 512);
s->sector_write_timer = qemu_new_timer(vm_clock,

View file

@ -208,6 +208,8 @@ static int scsi_req_length(SCSIRequest *req, uint8_t *cmd)
case SEEK_6:
case WRITE_FILEMARKS:
case SPACE:
case RESERVE:
case RELEASE:
case ERASE:
case ALLOW_MEDIUM_REMOVAL:
case VERIFY:
@ -319,7 +321,6 @@ static void scsi_req_xfer_mode(SCSIRequest *req)
case WRITE_BUFFER:
case FORMAT_UNIT:
case REASSIGN_BLOCKS:
case RESERVE:
case SEARCH_EQUAL:
case SEARCH_HIGH:
case SEARCH_LOW:

View file

@ -70,14 +70,15 @@ struct SCSIDiskState
char *serial;
};
static SCSIDiskReq *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun)
static SCSIDiskReq *scsi_new_request(SCSIDiskState *s, uint32_t tag,
uint32_t lun)
{
SCSIRequest *req;
SCSIDiskReq *r;
req = scsi_req_alloc(sizeof(SCSIDiskReq), d, tag, lun);
req = scsi_req_alloc(sizeof(SCSIDiskReq), &s->qdev, tag, lun);
r = DO_UPCAST(SCSIDiskReq, req, req);
r->iov.iov_base = qemu_memalign(512, SCSI_DMA_BUF_SIZE);
r->iov.iov_base = qemu_blockalign(s->bs, SCSI_DMA_BUF_SIZE);
return r;
}
@ -939,7 +940,7 @@ static int32_t scsi_send_command(SCSIDevice *d, uint32_t tag,
}
/* ??? Tags are not unique for different luns. We only implement a
single lun, so this should not matter. */
r = scsi_new_request(d, tag, lun);
r = scsi_new_request(s, tag, lun);
outbuf = (uint8_t *)r->iov.iov_base;
is_write = 0;
DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
@ -1177,6 +1178,7 @@ static int scsi_disk_initfn(SCSIDevice *dev)
s->qdev.blocksize = s->qdev.conf.logical_block_size;
}
s->cluster_size = s->qdev.blocksize / 512;
s->bs->buffer_alignment = s->qdev.blocksize;
s->qdev.type = TYPE_DISK;
qemu_add_vm_change_state_handler(scsi_dma_restart_cb, s);

View file

@ -455,15 +455,31 @@ static int get_stream_blocksize(BlockDriverState *bdrv)
return (buf[9] << 16) | (buf[10] << 8) | buf[11];
}
static void scsi_destroy(SCSIDevice *d)
static void scsi_generic_purge_requests(SCSIGenericState *s)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
SCSIGenericReq *r;
while (!QTAILQ_EMPTY(&s->qdev.requests)) {
r = DO_UPCAST(SCSIGenericReq, req, QTAILQ_FIRST(&s->qdev.requests));
if (r->req.aiocb) {
bdrv_aio_cancel(r->req.aiocb);
}
scsi_remove_request(r);
}
}
static void scsi_generic_reset(DeviceState *dev)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev.qdev, dev);
scsi_generic_purge_requests(s);
}
static void scsi_destroy(SCSIDevice *d)
{
SCSIGenericState *s = DO_UPCAST(SCSIGenericState, qdev, d);
scsi_generic_purge_requests(s);
blockdev_mark_auto_del(s->qdev.conf.bs);
}
@ -537,6 +553,7 @@ static SCSIDeviceInfo scsi_generic_info = {
.qdev.name = "scsi-generic",
.qdev.desc = "pass through generic scsi device (/dev/sg*)",
.qdev.size = sizeof(SCSIGenericState),
.qdev.reset = scsi_generic_reset,
.init = scsi_generic_initfn,
.destroy = scsi_destroy,
.send_command = scsi_send_command,

View file

@ -440,7 +440,7 @@ SDState *sd_init(BlockDriverState *bs, int is_spi)
SDState *sd;
sd = (SDState *) qemu_mallocz(sizeof(SDState));
sd->buf = qemu_memalign(512, 512);
sd->buf = qemu_blockalign(bs, 512);
sd->spi = is_spi;
sd->enable = 1;
sd_reset(sd, bs);

View file

@ -540,6 +540,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf)
register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
virtio_blk_save, virtio_blk_load, s);
bdrv_set_removable(s->bs, 0);
s->bs->buffer_alignment = conf->logical_block_size;
return &s->vdev;
}

25
nbd.c
View file

@ -49,6 +49,7 @@
/* This is all part of the "official" NBD API */
#define NBD_REPLY_SIZE (4 + 4 + 8)
#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
@ -588,7 +589,7 @@ static int nbd_receive_request(int csock, struct nbd_request *request)
int nbd_receive_reply(int csock, struct nbd_reply *reply)
{
uint8_t buf[4 + 4 + 8];
uint8_t buf[NBD_REPLY_SIZE];
uint32_t magic;
memset(buf, 0xAA, sizeof(buf));
@ -655,9 +656,9 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
if (nbd_receive_request(csock, &request) == -1)
return -1;
if (request.len > data_size) {
if (request.len + NBD_REPLY_SIZE > data_size) {
LOG("len (%u) is larger than max len (%u)",
request.len, data_size);
request.len + NBD_REPLY_SIZE, data_size);
errno = EINVAL;
return -1;
}
@ -687,7 +688,8 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
case NBD_CMD_READ:
TRACE("Request type is READ");
if (bdrv_read(bs, (request.from + dev_offset) / 512, data,
if (bdrv_read(bs, (request.from + dev_offset) / 512,
data + NBD_REPLY_SIZE,
request.len / 512) == -1) {
LOG("reading from file failed");
errno = EINVAL;
@ -697,12 +699,21 @@ int nbd_trip(BlockDriverState *bs, int csock, off_t size, uint64_t dev_offset,
TRACE("Read %u byte(s)", request.len);
if (nbd_send_reply(csock, &reply) == -1)
return -1;
/* Reply
[ 0 .. 3] magic (NBD_REPLY_MAGIC)
[ 4 .. 7] error (0 == no error)
[ 7 .. 15] handle
*/
cpu_to_be32w((uint32_t*)data, NBD_REPLY_MAGIC);
cpu_to_be32w((uint32_t*)(data + 4), reply.error);
cpu_to_be64w((uint64_t*)(data + 8), reply.handle);
TRACE("Sending data to client");
if (write_sync(csock, data, request.len) != request.len) {
if (write_sync(csock, data,
request.len + NBD_REPLY_SIZE) !=
request.len + NBD_REPLY_SIZE) {
LOG("writing to socket failed");
errno = EINVAL;
return -1;

View file

@ -270,7 +270,7 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
* Ok, we have to do it the hard way, copy all segments into
* a single aligned buffer.
*/
buf = qemu_memalign(512, aiocb->aio_nbytes);
buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf;
int i;

View file

@ -278,11 +278,14 @@ typedef struct QEMUIOVector {
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip,
size_t size);
void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size);
void qemu_iovec_destroy(QEMUIOVector *qiov);
void qemu_iovec_reset(QEMUIOVector *qiov);
void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count);
struct Monitor;
typedef struct Monitor Monitor;

View file

@ -61,7 +61,7 @@ static void *qemu_io_alloc(size_t len, int pattern)
if (misalign)
len += MISALIGN_OFFSET;
buf = qemu_memalign(512, len);
buf = qemu_blockalign(bs, len);
memset(buf, pattern, len);
if (misalign)
buf += MISALIGN_OFFSET;

View file

@ -44,7 +44,7 @@ static void usage(const char *name)
"Usage: %s [OPTIONS] FILE\n"
"QEMU Disk Network Block Device Server\n"
"\n"
" -p, --port=PORT port to listen on (default `1024')\n"
" -p, --port=PORT port to listen on (default `%d')\n"
" -o, --offset=OFFSET offset into the image\n"
" -b, --bind=IFACE interface to bind to (default `0.0.0.0')\n"
" -k, --socket=PATH path to the unix socket\n"
@ -62,7 +62,7 @@ static void usage(const char *name)
" -V, --version output version information and exit\n"
"\n"
"Report bugs to <anthony@codemonkey.ws>\n"
, name, "DEVICE");
, name, NBD_DEFAULT_PORT, "DEVICE");
}
static void version(const char *name)
@ -188,7 +188,7 @@ int main(int argc, char **argv)
bool readonly = false;
bool disconnect = false;
const char *bindto = "0.0.0.0";
int port = 1024;
int port = NBD_DEFAULT_PORT;
struct sockaddr_in addr;
socklen_t addr_len = sizeof(addr);
off_t fd_size;
@ -446,7 +446,7 @@ int main(int argc, char **argv)
max_fd = sharing_fds[0];
nb_fds++;
data = qemu_memalign(512, NBD_BUFFER_SIZE);
data = qemu_blockalign(bs, NBD_BUFFER_SIZE);
if (data == NULL)
errx(EXIT_FAILURE, "Cannot allocate data buffer");