From cc94712b9ec93d1301eea1fb8f1b08589c7e242e Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Sat, 19 Oct 2013 17:52:33 +0100 Subject: [PATCH 01/30] qapi: fix documentation example The QMP wire format uses "", not '', around strings. * docs/qapi-code-gen.txt: Fix typo. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- docs/qapi-code-gen.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt index 91f44d01b9..0728f36c65 100644 --- a/docs/qapi-code-gen.txt +++ b/docs/qapi-code-gen.txt @@ -164,7 +164,7 @@ This example allows using both of the following example objects: { "file": "my_existing_block_device_id" } { "file": { "driver": "file", "readonly": false, - 'filename': "/tmp/mydisk.qcow2" } } + "filename": "/tmp/mydisk.qcow2" } } === Commands === From eedff66f21e542650d895801549ce05ac108278b Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sun, 20 Oct 2013 20:28:20 +0200 Subject: [PATCH 02/30] qcow2: Restore total_sectors value in save_vmstate Since df2a6f29a5, bdrv_co_do_writev increases the total_sectors value of a growable block devices on writes after the current end. This leads to the virtual disk apparently growing in qcow2_save_vmstate, which in turn affects the disk size captured by the internal snapshot taken directly afterwards through e.g. the HMP savevm command. Such a "grown" snapshot cannot be loaded after reopening the qcow2 image, since its disk size differs from the actual virtual disk size (writing a VM state does not actually increase the virtual disk size). Fix this by restoring total_sectors at the end of qcow2_save_vmstate. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index c1abaffa19..4a3e8b45f8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1939,6 +1939,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { BDRVQcowState *s = bs->opaque; + int64_t total_sectors = bs->total_sectors; int growable = bs->growable; int ret; @@ -1947,6 +1948,11 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); bs->growable = growable; + /* bdrv_co_do_writev will have increased the total_sectors value to include + * the VM state - the VM state is however not an actual part of the block + * device, therefore, we need to restore the old value. */ + bs->total_sectors = total_sectors; + return ret; } From 6e13610aa454beba52944e8df6d93158d68ab911 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sun, 20 Oct 2013 21:52:35 +0200 Subject: [PATCH 03/30] qcow2: Unset zero_beyond_eof in save_vmstate Saving the VM state is done using bdrv_pwrite. This function may perform a read-modify-write, which in this case results in data being read from beyond the end of the virtual disk. Since we are actually trying to access an area which is not a part of the virtual disk, zero_beyond_eof has to be set to false before performing the partial write, otherwise the VM state may become corrupted. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 4a3e8b45f8..01269f9264 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1941,12 +1941,15 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, BDRVQcowState *s = bs->opaque; int64_t total_sectors = bs->total_sectors; int growable = bs->growable; + bool zero_beyond_eof = bs->zero_beyond_eof; int ret; BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); bs->growable = 1; + bs->zero_beyond_eof = false; ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); bs->growable = growable; + bs->zero_beyond_eof = zero_beyond_eof; /* bdrv_co_do_writev will have increased the total_sectors value to include * the VM state - the VM state is however not an actual part of the block From fefddf951b6dfe51c28d41f86669bfffb68c7a15 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 08:53:34 +0200 Subject: [PATCH 04/30] qemu-img: add special exit code if bdrv_check is not supported currently it is not possible to distinguish by exitcode if there has been an error or if bdrv_check is not supported by the image format. Change the exitcode from 1 to 63 for the latter case. Signed-off-by: Peter Lieven Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qemu-img.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 926f0a0feb..bf3fb4f8a6 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -607,7 +607,7 @@ static int img_check(int argc, char **argv) if (output_format == OFORMAT_HUMAN) { error_report("This image format does not support checks"); } - ret = 1; + ret = 63; goto fail; } From fb8fe35f63a56170cf1bf92b1991d0056385b901 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 24 Oct 2013 09:16:03 +0200 Subject: [PATCH 05/30] block/vpc: check that the image has not been truncated this adds a check that a dynamic VHD file has not been accidently truncated (e.g. during transfer or upload). Signed-off-by: Peter Lieven Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/vpc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/vpc.c b/block/vpc.c index b5dca3961e..627d11cb9b 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -260,6 +260,13 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } } + if (s->free_data_block_offset > bdrv_getlength(bs->file)) { + error_setg(errp, "block-vpc: free_data_block_offset points after " + "the end of file. The image has been truncated."); + ret = -EINVAL; + goto fail; + } + s->last_bitmap_offset = (int64_t) -1; #ifdef CACHE From ab6f2bbb2871db8a7ed2457328e864cdf2e2fc82 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 24 Oct 2013 20:24:43 +0200 Subject: [PATCH 06/30] qemu-iotests: Test for loading VM state from qcow2 Add a test for saving a VM state from a qcow2 image and loading it back (with having restarted qemu in between); this should work without any problems. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/068 | 65 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/068.out | 11 +++++++ tests/qemu-iotests/group | 1 + 3 files changed, 77 insertions(+) create mode 100755 tests/qemu-iotests/068 create mode 100644 tests/qemu-iotests/068.out diff --git a/tests/qemu-iotests/068 b/tests/qemu-iotests/068 new file mode 100755 index 0000000000..b72e55599b --- /dev/null +++ b/tests/qemu-iotests/068 @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Test case for loading a saved VM state from a qcow2 image +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# This tests qocw2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +IMGOPTS="compat=1.1" +IMG_SIZE=128K + +echo +echo "=== Saving and reloading a VM state to/from a qcow2 image ===" +echo +_make_test_img $IMG_SIZE +# Give qemu some time to boot before saving the VM state +bash -c 'sleep 1; echo -e "savevm 0\nquit"' |\ + $QEMU -nographic -monitor stdio -serial none -hda "$TEST_IMG" |\ + _filter_qemu +# Now try to continue from that VM state (this should just work) +echo quit |\ + $QEMU -nographic -monitor stdio -serial none -hda "$TEST_IMG" -loadvm 0 |\ + _filter_qemu + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/068.out b/tests/qemu-iotests/068.out new file mode 100644 index 0000000000..abe35a9f8c --- /dev/null +++ b/tests/qemu-iotests/068.out @@ -0,0 +1,11 @@ +QA output created by 068 + +=== Saving and reloading a VM state to/from a qcow2 image === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) ssasavsavesavevsavevmsavevm savevm 0 +(qemu) qququiquit +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) qququiquit +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 13c5500f54..3ca9cba2e4 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -73,3 +73,4 @@ 065 rw auto 066 rw auto 067 rw auto +068 rw auto From ba2ab2f2ca4150a7e314fbb19fa158bd8ddc36eb Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Thu, 24 Oct 2013 20:35:06 +0200 Subject: [PATCH 07/30] qcow2: Flush image after creation Opening the qcow2 image with BDRV_O_NO_FLUSH prevents any flushes during the image creation. This means that the image has not yet been flushed to disk when qemu-img create exits. This flush is delayed until the next operation on the image involving opening it without BDRV_O_NO_FLUSH and closing (or directly flushing) it. For large images and/or images with a small cluster size and preallocated metadata, this flush may take a significant amount of time and may occur unexpectedly. Reopening the image without BDRV_O_NO_FLUSH right before the end of qcow2_create2() results in hoisting the potentially costly flush into the image creation, which is expected to take some time (whereas successive image operations may be not). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Benoit Canet Signed-off-by: Kevin Wolf --- block/qcow2.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c index 01269f9264..6e5d98dc48 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1584,6 +1584,16 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } + bdrv_close(bs); + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ + ret = bdrv_open(bs, filename, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB, drv, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + ret = 0; out: bdrv_unref(bs); From e85d9db5f6f86299688a0acd2796ac1ccc96610c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 22 Jul 2013 14:30:23 +0200 Subject: [PATCH 08/30] exec: Fix bounce buffer allocation in address_space_map() This fixes a regression introduced by commit e3127ae0c, which kept the allocation size of the bounce buffer limited to one page in order to avoid unbounded allocations (as explained in the commit message of 6d16c2f88), but broke the reporting of the shortened bounce buffer to the caller. The caller therefore assumes that the full requested size was provided and causes memory corruption when writing beyond the end of the actually allocated buffer. Signed-off-by: Kevin Wolf --- exec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/exec.c b/exec.c index 2e31ffcb2c..b453713bdb 100644 --- a/exec.c +++ b/exec.c @@ -2099,7 +2099,9 @@ void *address_space_map(AddressSpace *as, if (bounce.buffer) { return NULL; } - bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); + /* Avoid unbounded allocations */ + l = MIN(l, TARGET_PAGE_SIZE); + bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l); bounce.addr = addr; bounce.len = l; From d7b7e580096255c766f7b1e7502a9151b95091e8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 22 Jul 2013 14:26:25 +0200 Subject: [PATCH 09/30] ide-test: Check what happens with bus mastering disabled The main goal is that qemu doesn't crash. Signed-off-by: Kevin Wolf --- tests/ide-test.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/ide-test.c b/tests/ide-test.c index 7307f1d336..bc824a8144 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -81,6 +81,7 @@ enum { CMD_IDENTIFY = 0xec, CMDF_ABORT = 0x100, + CMDF_NO_BM = 0x200, }; enum { @@ -192,6 +193,11 @@ static int send_dma_request(int cmd, uint64_t sector, int nb_sectors, g_assert_not_reached(); } + if (flags & CMDF_NO_BM) { + qpci_config_writew(dev, PCI_COMMAND, + PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + } + /* Select device 0 */ outb(IDE_BASE + reg_device, 0 | LBA); @@ -352,6 +358,25 @@ static void test_bmdma_long_prdt(void) assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR); } +static void test_bmdma_no_busmaster(void) +{ + uint8_t status; + + /* No PRDT_EOT, each entry addr 0/size 64k, and in theory qemu shouldn't be + * able to access it anyway because the Bus Master bit in the PCI command + * register isn't set. This is complete nonsense, but it used to be pretty + * good at confusing and occasionally crashing qemu. */ + PrdtEntry prdt[4096] = { }; + + status = send_dma_request(CMD_READ_DMA | CMDF_NO_BM, 0, 512, + prdt, ARRAY_SIZE(prdt)); + + /* Not entirely clear what the expected result is, but this is what we get + * in practice. At least we want to be aware of any changes. */ + g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR); + assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR); +} + static void test_bmdma_setup(void) { ide_test_start( @@ -493,6 +518,7 @@ int main(int argc, char **argv) qtest_add_func("/ide/bmdma/simple_rw", test_bmdma_simple_rw); qtest_add_func("/ide/bmdma/short_prdt", test_bmdma_short_prdt); qtest_add_func("/ide/bmdma/long_prdt", test_bmdma_long_prdt); + qtest_add_func("/ide/bmdma/no_busmaster", test_bmdma_no_busmaster); qtest_add_func("/ide/bmdma/teardown", test_bmdma_teardown); qtest_add_func("/ide/flush", test_flush); From d1f3a23bfac4fe38056ab5e07186939b7be8852b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 27 Jun 2013 13:50:05 +0200 Subject: [PATCH 10/30] tests: Multiboot mmap test case This adds a test case for Multiboot memory map in the tests/multiboot directory, where future i386 test kernels can be dropped. Because this requires an x86 build host and an installed 32 bit libgcc, the test is not part of a regular 'make check'. The reference output for the test is verified against test runs of the same multiboot kernel booted by some GRUB 0.97. Signed-off-by: Kevin Wolf --- tests/multiboot/Makefile | 18 +++++ tests/multiboot/libc.c | 139 ++++++++++++++++++++++++++++++++++++ tests/multiboot/libc.h | 61 ++++++++++++++++ tests/multiboot/link.ld | 19 +++++ tests/multiboot/mmap.c | 56 +++++++++++++++ tests/multiboot/mmap.out | 93 ++++++++++++++++++++++++ tests/multiboot/multiboot.h | 66 +++++++++++++++++ tests/multiboot/run_test.sh | 81 +++++++++++++++++++++ tests/multiboot/start.S | 51 +++++++++++++ 9 files changed, 584 insertions(+) create mode 100644 tests/multiboot/Makefile create mode 100644 tests/multiboot/libc.c create mode 100644 tests/multiboot/libc.h create mode 100644 tests/multiboot/link.ld create mode 100644 tests/multiboot/mmap.c create mode 100644 tests/multiboot/mmap.out create mode 100644 tests/multiboot/multiboot.h create mode 100755 tests/multiboot/run_test.sh create mode 100644 tests/multiboot/start.S diff --git a/tests/multiboot/Makefile b/tests/multiboot/Makefile new file mode 100644 index 0000000000..34cdd81a90 --- /dev/null +++ b/tests/multiboot/Makefile @@ -0,0 +1,18 @@ +CC=gcc +CCFLAGS=-m32 -Wall -Wextra -Werror -fno-stack-protector -nostdinc -fno-builtin +ASFLAGS=-m32 + +LD=ld +LDFLAGS=-melf_i386 -T link.ld +LIBS=$(shell $(CC) $(CCFLAGS) -print-libgcc-file-name) + +all: mmap.elf + +mmap.elf: start.o mmap.o libc.o + $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) + +%.o: %.c + $(CC) $(CCFLAGS) -c -o $@ $^ + +%.o: %.S + $(CC) $(ASFLAGS) -c -o $@ $^ diff --git a/tests/multiboot/libc.c b/tests/multiboot/libc.c new file mode 100644 index 0000000000..05abbd92cc --- /dev/null +++ b/tests/multiboot/libc.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "libc.h" + +static void print_char(char c) +{ + outb(0xe9, c); +} + +static void print_str(char *s) +{ + while (*s) { + print_char(*s++); + } +} + +static void print_num(uint64_t value, int base) +{ + char digits[] = "0123456789abcdef"; + char buf[32] = { 0 }; + int i = sizeof(buf) - 2; + + do { + buf[i--] = digits[value % base]; + value /= base; + } while (value); + + print_str(&buf[i + 1]); +} + +void printf(const char *fmt, ...) +{ + va_list ap; + uint64_t val; + char *str; + int base; + int has_long; + int alt_form; + + va_start(ap, fmt); + + for (; *fmt; fmt++) { + if (*fmt != '%') { + print_char(*fmt); + continue; + } + fmt++; + + if (*fmt == '#') { + fmt++; + alt_form = 1; + } else { + alt_form = 0; + } + + if (*fmt == 'l') { + fmt++; + if (*fmt == 'l') { + fmt++; + has_long = 2; + } else { + has_long = 1; + } + } else { + has_long = 0; + } + + switch (*fmt) { + case 'x': + case 'p': + base = 16; + goto convert_number; + case 'd': + case 'i': + case 'u': + base = 10; + goto convert_number; + case 'o': + base = 8; + goto convert_number; + + convert_number: + switch (has_long) { + case 0: + val = va_arg(ap, unsigned int); + break; + case 1: + val = va_arg(ap, unsigned long); + break; + case 2: + val = va_arg(ap, unsigned long long); + break; + } + + if (alt_form && base == 16) { + print_str("0x"); + } + + print_num(val, base); + break; + + case 's': + str = va_arg(ap, char*); + print_str(str); + break; + case '%': + print_char(*fmt); + break; + default: + print_char('%'); + print_char(*fmt); + break; + } + } + + va_end(ap); +} + + diff --git a/tests/multiboot/libc.h b/tests/multiboot/libc.h new file mode 100644 index 0000000000..80eec5b7a0 --- /dev/null +++ b/tests/multiboot/libc.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2013 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef LIBC_H +#define LIBC_H + +/* Integer types */ + +typedef unsigned long long uint64_t; +typedef unsigned int uint32_t; +typedef unsigned short uint16_t; +typedef unsigned char uint8_t; + +typedef signed long long int64_t; +typedef signed int int32_t; +typedef signed short int16_t; +typedef signed char int8_t; + +typedef uint32_t uintptr_t; + + +/* stdarg.h */ + +typedef __builtin_va_list va_list; +#define va_start(ap, X) __builtin_va_start(ap, X) +#define va_arg(ap, type) __builtin_va_arg(ap, type) +#define va_end(ap) __builtin_va_end(ap) + + +/* Port I/O functions */ + +static inline void outb(uint16_t port, uint8_t data) +{ + asm volatile ("outb %0, %1" : : "a" (data), "Nd" (port)); +} + + +/* Misc functions */ + +void printf(const char *fmt, ...); + +#endif diff --git a/tests/multiboot/link.ld b/tests/multiboot/link.ld new file mode 100644 index 0000000000..3d49b58c60 --- /dev/null +++ b/tests/multiboot/link.ld @@ -0,0 +1,19 @@ +ENTRY(_start) + +SECTIONS +{ + . = 0x100000; + .text : { + *(multiboot) + *(.text) + } + .data ALIGN(4096) : { + *(.data) + } + .rodata ALIGN(4096) : { + *(.rodata) + } + .bss ALIGN(4096) : { + *(.bss) + } +} diff --git a/tests/multiboot/mmap.c b/tests/multiboot/mmap.c new file mode 100644 index 0000000000..766b003f38 --- /dev/null +++ b/tests/multiboot/mmap.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2013 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "libc.h" +#include "multiboot.h" + +int test_main(uint32_t magic, struct mb_info *mbi) +{ + uintptr_t entry_addr; + struct mb_mmap_entry *entry; + + (void) magic; + + printf("Lower memory: %dk\n", mbi->mem_lower); + printf("Upper memory: %dk\n", mbi->mem_upper); + + printf("\ne820 memory map:\n"); + + for (entry_addr = mbi->mmap_addr; + entry_addr < mbi->mmap_addr + mbi->mmap_length; + entry_addr += entry->size + 4) + { + entry = (struct mb_mmap_entry*) entry_addr; + + printf("%#llx - %#llx: type %d [entry size: %d]\n", + entry->base_addr, + entry->base_addr + entry->length, + entry->type, + entry->size); + } + + printf("\nmmap start: %#x\n", mbi->mmap_addr); + printf("mmap end: %#x\n", mbi->mmap_addr + mbi->mmap_length); + printf("real mmap end: %#x\n", entry_addr); + + return 0; +} diff --git a/tests/multiboot/mmap.out b/tests/multiboot/mmap.out new file mode 100644 index 0000000000..e70b6eb45d --- /dev/null +++ b/tests/multiboot/mmap.out @@ -0,0 +1,93 @@ + + + +=== Running test case: mmap.elf === + +Lower memory: 639k +Upper memory: 130040k + +e820 memory map: +0x0 - 0x9fc00: type 1 [entry size: 20] +0x9fc00 - 0xa0000: type 2 [entry size: 20] +0xf0000 - 0x100000: type 2 [entry size: 20] +0x100000 - 0x7ffe000: type 1 [entry size: 20] +0x7ffe000 - 0x8000000: type 2 [entry size: 20] +0xfffc0000 - 0x100000000: type 2 [entry size: 20] + +mmap start: 0x9000 +mmap end: 0x9090 +real mmap end: 0x9090 + + +=== Running test case: mmap.elf -m 1.1M === + +Lower memory: 639k +Upper memory: 96k + +e820 memory map: +0x0 - 0x9fc00: type 1 [entry size: 20] +0x9fc00 - 0xa0000: type 2 [entry size: 20] +0xf0000 - 0x100000: type 2 [entry size: 20] +0x100000 - 0x118000: type 1 [entry size: 20] +0x118000 - 0x11a000: type 2 [entry size: 20] +0xfffc0000 - 0x100000000: type 2 [entry size: 20] + +mmap start: 0x9000 +mmap end: 0x9090 +real mmap end: 0x9090 + + +=== Running test case: mmap.elf -m 2G === + +Lower memory: 639k +Upper memory: 2096120k + +e820 memory map: +0x0 - 0x9fc00: type 1 [entry size: 20] +0x9fc00 - 0xa0000: type 2 [entry size: 20] +0xf0000 - 0x100000: type 2 [entry size: 20] +0x100000 - 0x7fffe000: type 1 [entry size: 20] +0x7fffe000 - 0x80000000: type 2 [entry size: 20] +0xfffc0000 - 0x100000000: type 2 [entry size: 20] + +mmap start: 0x9000 +mmap end: 0x9090 +real mmap end: 0x9090 + + +=== Running test case: mmap.elf -m 4G === + +Lower memory: 639k +Upper memory: 3668984k + +e820 memory map: +0x0 - 0x9fc00: type 1 [entry size: 20] +0x9fc00 - 0xa0000: type 2 [entry size: 20] +0xf0000 - 0x100000: type 2 [entry size: 20] +0x100000 - 0xdfffe000: type 1 [entry size: 20] +0xdfffe000 - 0xe0000000: type 2 [entry size: 20] +0xfffc0000 - 0x100000000: type 2 [entry size: 20] +0x100000000 - 0x120000000: type 1 [entry size: 20] + +mmap start: 0x9000 +mmap end: 0x90a8 +real mmap end: 0x90a8 + + +=== Running test case: mmap.elf -m 8G === + +Lower memory: 639k +Upper memory: 3668984k + +e820 memory map: +0x0 - 0x9fc00: type 1 [entry size: 20] +0x9fc00 - 0xa0000: type 2 [entry size: 20] +0xf0000 - 0x100000: type 2 [entry size: 20] +0x100000 - 0xdfffe000: type 1 [entry size: 20] +0xdfffe000 - 0xe0000000: type 2 [entry size: 20] +0xfffc0000 - 0x100000000: type 2 [entry size: 20] +0x100000000 - 0x220000000: type 1 [entry size: 20] + +mmap start: 0x9000 +mmap end: 0x90a8 +real mmap end: 0x90a8 diff --git a/tests/multiboot/multiboot.h b/tests/multiboot/multiboot.h new file mode 100644 index 0000000000..4eb1fbe5d4 --- /dev/null +++ b/tests/multiboot/multiboot.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MULTIBOOT_H +#define MULTIBOOT_H + +#include "libc.h" + +struct mb_info { + uint32_t flags; + uint32_t mem_lower; + uint32_t mem_upper; + uint32_t boot_device; + uint32_t cmdline; + uint32_t mods_count; + uint32_t mods_addr; + char syms[16]; + uint32_t mmap_length; + uint32_t mmap_addr; + uint32_t drives_length; + uint32_t drives_addr; + uint32_t config_table; + uint32_t boot_loader_name; + uint32_t apm_table; + uint32_t vbe_control_info; + uint32_t vbe_mode_info; + uint16_t vbe_mode; + uint16_t vbe_interface_seg; + uint16_t vbe_interface_off; + uint16_t vbe_interface_len; +} __attribute__((packed)); + +struct mb_module { + uint32_t mod_start; + uint32_t mod_end; + uint32_t string; + uint32_t reserved; +} __attribute__((packed)); + +struct mb_mmap_entry { + uint32_t size; + uint64_t base_addr; + uint64_t length; + uint32_t type; +} __attribute__((packed)); + +#endif diff --git a/tests/multiboot/run_test.sh b/tests/multiboot/run_test.sh new file mode 100755 index 0000000000..97a9a49f8b --- /dev/null +++ b/tests/multiboot/run_test.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# Copyright (c) 2013 Kevin Wolf +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +QEMU=${QEMU:-"../../x86_64-softmmu/qemu-system-x86_64"} + +run_qemu() { + local kernel=$1 + shift + + echo -e "\n\n=== Running test case: $kernel $@ ===\n" >> test.log + + $QEMU \ + -kernel $kernel \ + -display none \ + -device isa-debugcon,chardev=stdio \ + -chardev file,path=test.out,id=stdio \ + -device isa-debug-exit,iobase=0xf4,iosize=0x4 \ + "$@" + ret=$? + + cat test.out >> test.log +} + +mmap() { + run_qemu mmap.elf + run_qemu mmap.elf -m 1.1M + run_qemu mmap.elf -m 2G + run_qemu mmap.elf -m 4G + run_qemu mmap.elf -m 8G +} + + +make all + +for t in mmap; do + + echo > test.log + $t + + debugexit=$((ret & 0x1)) + ret=$((ret >> 1)) + pass=1 + + if [ $debugexit != 1 ]; then + echo -e "\e[31m ?? \e[0m $t (no debugexit used, exit code $ret)" + pass=0 + elif [ $ret != 0 ]; then + echo -e "\e[31mFAIL\e[0m $t (exit code $ret)" + pass=0 + fi + + if ! diff $t.out test.log > /dev/null 2>&1; then + echo -e "\e[31mFAIL\e[0m $t (output difference)" + diff -u $t.out test.log + pass=0 + fi + + if [ $pass == 1 ]; then + echo -e "\e[32mPASS\e[0m $t" + fi + +done diff --git a/tests/multiboot/start.S b/tests/multiboot/start.S new file mode 100644 index 0000000000..7d33959650 --- /dev/null +++ b/tests/multiboot/start.S @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +.section multiboot + +#define MB_MAGIC 0x1badb002 +#define MB_FLAGS 0x0 +#define MB_CHECKSUM -(MB_MAGIC + MB_FLAGS) + +.align 4 +.int MB_MAGIC +.int MB_FLAGS +.int MB_CHECKSUM + +.section .text +.global _start +_start: + mov $stack, %esp + push %ebx + push %eax + call test_main + + /* Test device exit */ + outl %eax, $0xf4 + + cli + hlt + jmp . + +.section bss +.space 8192 +stack: From 61ed2684539f7f31304e193d7c0e68d57ce6be88 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sat, 26 Oct 2013 15:44:43 +0200 Subject: [PATCH 11/30] block: Don't copy backing file name on error bdrv_open_backing_file() tries to copy the backing file name using pstrcpy directly after calling bdrv_open() to open the backing file without checking whether that was actually successful. If it was not, ps->backing_hd->file will probably be NULL and qemu will crash. Fix this by moving pstrcpy after checking whether bdrv_open() succeeded. Signed-off-by: Max Reitz Reviewed-by: Benoit Canet Reviewed-by: Amos Kong Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index fd05a8008a..366999b15c 100644 --- a/block.c +++ b/block.c @@ -1004,8 +1004,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) ret = bdrv_open(bs->backing_hd, *backing_filename ? backing_filename : NULL, options, back_flags, back_drv, &local_err); - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - bs->backing_hd->file->filename); if (ret < 0) { bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; @@ -1013,6 +1011,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_propagate(errp, local_err); return ret; } + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->backing_hd->file->filename); return 0; } From 29a67f7e9204a25bc4b6221f287ad0ae38d8cbdc Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Wed, 23 Oct 2013 16:51:51 +0800 Subject: [PATCH 12/30] sheepdog: explicitly set copies as type uint8_t 'copies' is actually uint8_t since day one, but request headers and some helper functions parameterize it as uint32_t for unknown reasons and effectively reserve 24 bytes for possible future use. This patch explicitly set the correct for copies and reserve the left bytes. This is a preparation patch that allow passing copy_policy in request header. Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Acked-by: MORITA Kazutaka Signed-off-by: Kevin Wolf --- block/sheepdog.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 5f81c93ee3..b8a2985cd9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -125,8 +125,8 @@ typedef struct SheepdogObjReq { uint32_t data_length; uint64_t oid; uint64_t cow_oid; - uint32_t copies; - uint32_t rsvd; + uint8_t copies; + uint8_t reserved[7]; uint64_t offset; } SheepdogObjReq; @@ -138,7 +138,8 @@ typedef struct SheepdogObjRsp { uint32_t id; uint32_t data_length; uint32_t result; - uint32_t copies; + uint8_t copies; + uint8_t reserved[3]; uint32_t pad[6]; } SheepdogObjRsp; @@ -151,7 +152,8 @@ typedef struct SheepdogVdiReq { uint32_t data_length; uint64_t vdi_size; uint32_t vdi_id; - uint32_t copies; + uint8_t copies; + uint8_t reserved[3]; uint32_t snapid; uint32_t pad[3]; } SheepdogVdiReq; @@ -1081,7 +1083,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, return 0; } -static int read_write_object(int fd, char *buf, uint64_t oid, int copies, +static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool write, bool create, uint32_t cache_flags) { @@ -1129,7 +1131,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies, } } -static int read_object(int fd, char *buf, uint64_t oid, int copies, +static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, uint32_t cache_flags) { @@ -1137,7 +1139,7 @@ static int read_object(int fd, char *buf, uint64_t oid, int copies, false, cache_flags); } -static int write_object(int fd, char *buf, uint64_t oid, int copies, +static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies, unsigned int datalen, uint64_t offset, bool create, uint32_t cache_flags) { From 1841f8801c8898fa57c66e27a08541ffcc6f3948 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Wed, 23 Oct 2013 16:51:52 +0800 Subject: [PATCH 13/30] sheepdog: pass copy_policy in the request Currently copy_policy isn't used. Recent sheepdog supports erasure coding, which make use of copy_policy internally, but require client explicitly passing copy_policy from base inode to newly creately inode for snapshot related operations. If connected sheep daemon doesn't utilize copy_policy, passing it to sheep daemon is just one extra null effect operation. So no compatibility problem. With this patch, sheepdog can provide erasure coded volume for QEMU VM. Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Acked-by: MORITA Kazutaka Signed-off-by: Kevin Wolf --- block/sheepdog.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index b8a2985cd9..9f0757bd69 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -126,7 +126,8 @@ typedef struct SheepdogObjReq { uint64_t oid; uint64_t cow_oid; uint8_t copies; - uint8_t reserved[7]; + uint8_t copy_policy; + uint8_t reserved[6]; uint64_t offset; } SheepdogObjReq; @@ -139,7 +140,8 @@ typedef struct SheepdogObjRsp { uint32_t data_length; uint32_t result; uint8_t copies; - uint8_t reserved[3]; + uint8_t copy_policy; + uint8_t reserved[2]; uint32_t pad[6]; } SheepdogObjRsp; @@ -153,7 +155,8 @@ typedef struct SheepdogVdiReq { uint64_t vdi_size; uint32_t vdi_id; uint8_t copies; - uint8_t reserved[3]; + uint8_t copy_policy; + uint8_t reserved[2]; uint32_t snapid; uint32_t pad[3]; } SheepdogVdiReq; @@ -1346,7 +1349,8 @@ out: } static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, - uint32_t base_vid, uint32_t *vdi_id, int snapshot) + uint32_t base_vid, uint32_t *vdi_id, int snapshot, + uint8_t copy_policy) { SheepdogVdiReq hdr; SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; @@ -1376,6 +1380,7 @@ static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size, hdr.data_length = wlen; hdr.vdi_size = vdi_size; + hdr.copy_policy = copy_policy; ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1528,7 +1533,8 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, bdrv_unref(bs); } - ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); + /* TODO: allow users to specify copy number */ + ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0, 0); if (!prealloc || ret) { goto out; } @@ -1718,7 +1724,7 @@ static int sd_create_branch(BDRVSheepdogState *s) */ deleted = sd_delete(s); ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, - !deleted); + !deleted, s->inode.copy_policy); if (ret) { goto out; } @@ -2008,7 +2014,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) } ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, - 1); + 1, s->inode.copy_policy); if (ret < 0) { error_report("failed to create inode for snapshot. %s", strerror(errno)); From 8464b273d69c61e33c55347e5b6bc0659687bae2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 28 Oct 2013 21:01:51 +0200 Subject: [PATCH 14/30] ahci: fix win7 hang on boot When AHCI executes an asynchronous IDE command, it checked DRDY without checking either DRQ or BSY. This sometimes caused interrupt to be sent before command is actually completed. This resulted in a race condition: if guest then managed to access the device before command has completed, it would hang waiting for an interrupt. This was observed with windows 7 guests. To fix, check for DRQ or BSY in additiona to DRDY, if set, the command is asynchronous so delay the interrupt until asynchronous done callback is invoked. Reported-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Tested-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index a8be62cf99..fbea9e8886 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -961,7 +961,8 @@ static int handle_cmd(AHCIState *s, int port, int slot) /* We're ready to process the command in FIS byte 2. */ ide_exec_cmd(&s->dev[port].port, cmd_fis[2]); - if (s->dev[port].port.ifs[0].status & READY_STAT) { + if ((s->dev[port].port.ifs[0].status & (READY_STAT|DRQ_STAT|BUSY_STAT)) == + READY_STAT) { ahci_write_fis_d2h(&s->dev[port], cmd_fis); } } From 87a5debd3161d24a7d4c685e3c0d8765b5d92a74 Mon Sep 17 00:00:00 2001 From: Thibaut LAURENT Date: Fri, 25 Oct 2013 02:15:07 +0200 Subject: [PATCH 15/30] block: Disable BDRV_O_COPY_ON_READ for the backing file Since commit 0ebd24e0a203cf2852c310b59fbe050190dc6c8c, bdrv_open_common will throw an error when trying to open a file read-only with the BDRV_O_COPY_ON_READ flag set. Although BDRV_O_RDWR is unset for the backing files, BDRV_O_COPY_ON_READ is still passed on if copy-on-read was requested for the drive. Let's unset this flag too before opening the backing file, or bdrv_open_common will fail. Signed-off-by: Thibaut LAURENT Reviewed-by: Benoit Canet Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- block.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 366999b15c..61795fea46 100644 --- a/block.c +++ b/block.c @@ -999,7 +999,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) } /* backing files always opened read-only */ - back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT); + back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | + BDRV_O_COPY_ON_READ); ret = bdrv_open(bs->backing_hd, *backing_filename ? backing_filename : NULL, options, From b94a2610573cd9314f244207c8b04cb75e42d7f8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 Oct 2013 12:18:58 +0100 Subject: [PATCH 16/30] block: Avoid unecessary drv->bdrv_getlength() calls The block layer generally keeps the size of an image cached in bs->total_sectors so that it doesn't have to perform expensive operations to get the size whenever it needs it. This doesn't work however when using a backend that can change its size without qemu being aware of it, i.e. passthrough of removable media like CD-ROMs or floppy disks. For this reason, the caching is disabled when a removable device is used. It is obvious that checking whether the _guest_ device has removable media isn't the right thing to do when we want to know whether the size of the host backend can change. To make things worse, non-top-level BlockDriverStates never have any device attached, which makes qemu assume they are removable, so drv->bdrv_getlength() is always called on the protocol layer. In the case of raw-posix, this causes unnecessary lseek() system calls, which turned out to be rather expensive. This patch completely changes the logic and disables bs->total_sectors caching only for certain block driver types, for which a size change is expected: host_cdrom and host_floppy on POSIX, host_device on win32; also the raw format in case it sits on top of one of these protocols, but in the common case the nested bdrv_getlength() call on the protocol driver will use the cache again and avoid an expensive drv->bdrv_getlength() call. Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- block.c | 7 ++++--- block/raw-posix.c | 9 ++++++--- block/raw-win32.c | 4 +++- block/raw_bsd.c | 1 + include/block/block_int.h | 3 +++ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/block.c b/block.c index 61795fea46..58efb5b4e4 100644 --- a/block.c +++ b/block.c @@ -2869,9 +2869,10 @@ int64_t bdrv_getlength(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - if (bdrv_dev_has_removable_media(bs)) { - if (drv->bdrv_getlength) { - return drv->bdrv_getlength(bs); + if (drv->has_variable_length) { + int ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + return ret; } } return bs->total_sectors * BDRV_SECTOR_SIZE; diff --git a/block/raw-posix.c b/block/raw-posix.c index 6f03fbf793..f6d48bbdb2 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1715,7 +1715,8 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1824,7 +1825,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, @@ -1951,7 +1953,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_flush = raw_aio_flush, .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, diff --git a/block/raw-win32.c b/block/raw-win32.c index 676b5701db..2741e4de10 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -616,7 +616,9 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, - .bdrv_getlength = raw_getlength, + .bdrv_getlength = raw_getlength, + .has_variable_length = true, + .bdrv_get_allocated_file_size = raw_get_allocated_file_size, }; diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 0078c1baeb..2265dcc03f 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -178,6 +178,7 @@ static BlockDriver bdrv_raw = { .bdrv_co_get_block_status = &raw_co_get_block_status, .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, + .has_variable_length = true, .bdrv_get_info = &raw_get_info, .bdrv_is_inserted = &raw_is_inserted, .bdrv_media_changed = &raw_media_changed, diff --git a/include/block/block_int.h b/include/block/block_int.h index a48731d539..166606615c 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -156,8 +156,11 @@ struct BlockDriver { const char *protocol_name; int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); + int64_t (*bdrv_getlength)(BlockDriverState *bs); + bool has_variable_length; int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); From a7cf03d4e150abec88f5837461242dc8a0eee189 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 29 Oct 2013 17:05:35 +0100 Subject: [PATCH 17/30] qemu-iotests: Fix 051 reference output Commit 684b254 forgot to update it. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/051.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 2839e32807..15deef6dc3 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -24,7 +24,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) iininfinfoinfo info binfo blinfo bloinfo blocinfo block ide0-hd0: TEST_DIR/t.qcow2 (qcow2) Backing file: TEST_DIR/t.qcow2.orig (chain depth: 1) - [not inserted](qemu) qququiquit +(qemu) qququiquit === Enable and disable lazy refcounting on the command line, plus some invalid values === From 915365a9c622be52c87fcc1cc9d63fbc5cd75b6d Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 30 Oct 2013 17:23:54 +0800 Subject: [PATCH 18/30] qemu-iotests: drop duplicated "create_image" There's a same common function in iotests.py Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/040 | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index aad535a74b..a2e18c56d4 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -54,22 +54,12 @@ class ImageCommitTestCase(iotests.QMPTestCase): self.assert_no_active_commit() - def create_image(self, name, size): - file = open(name, 'w') - i = 0 - while i < size: - sector = struct.pack('>l504xl', i / 512, i / 512) - file.write(sector) - i = i + 512 - file.close() - - class TestSingleDrive(ImageCommitTestCase): image_len = 1 * 1024 * 1024 test_len = 1 * 1024 * 256 def setUp(self): - self.create_image(backing_img, TestSingleDrive.image_len) + iotests.create_image(backing_img, TestSingleDrive.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) qemu_io('-c', 'write -P 0xab 0 524288', backing_img) @@ -167,7 +157,7 @@ class TestRelativePaths(ImageCommitTestCase): except OSError as exception: if exception.errno != errno.EEXIST: raise - self.create_image(self.backing_img_abs, TestRelativePaths.image_len) + iotests.create_image(self.backing_img_abs, TestRelativePaths.image_len) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.backing_img_abs, self.mid_img_abs) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.mid_img_abs, self.test_img) qemu_img('rebase', '-u', '-b', self.backing_img, self.mid_img_abs) From 321fd7d2b88defe11528e4d5a9f686c89ebee1ee Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 29 Oct 2013 19:18:54 +0100 Subject: [PATCH 19/30] qemu-iotests: Test case for backing file deletion Add a test case for trying to open an image file where it is impossible to open its backing file (in this case, because it was deleted). When doing this, qemu (or qemu-io in this case) should not crash but rather print an appropriate error message. Signed-off-by: Max Reitz Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/069 | 59 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/069.out | 8 ++++++ tests/qemu-iotests/group | 1 + 3 files changed, 68 insertions(+) create mode 100755 tests/qemu-iotests/069 create mode 100644 tests/qemu-iotests/069.out diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069 new file mode 100755 index 0000000000..3042803a81 --- /dev/null +++ b/tests/qemu-iotests/069 @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Test case for deleting a backing file +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt cow qed qcow qcow2 vmdk +_supported_proto generic +_supported_os Linux + +IMG_SIZE=128K + +echo +echo "=== Creating an image with a backing file and deleting that file ===" +echo +TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE +_make_test_img -b "$TEST_IMG.base" $IMG_SIZE +rm -f "$TEST_IMG.base" +# Just open the image and close it right again (this should print an error message) +$QEMU_IO -c quit "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/069.out b/tests/qemu-iotests/069.out new file mode 100644 index 0000000000..364881429c --- /dev/null +++ b/tests/qemu-iotests/069.out @@ -0,0 +1,8 @@ +QA output created by 069 + +=== Creating an image with a backing file and deleting that file === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=131072 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 backing_file='TEST_DIR/t.IMGFMT.base' +qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open file: No such file or directory +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 3ca9cba2e4..c57ff35843 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -74,3 +74,4 @@ 066 rw auto 067 rw auto 068 rw auto +069 rw auto From 80731d9da560461bbdcda5ad4b05f4a8a846fccd Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:11 +0900 Subject: [PATCH 20/30] sheepdog: check return values of qemu_co_recv/send correctly If qemu_co_recv/send doesn't return the specified length, it means that an error happened. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 9f0757bd69..c6e57f0685 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -494,13 +494,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, int ret; ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); - if (ret < sizeof(*hdr)) { + if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); return ret; } ret = qemu_co_send(sockfd, data, *wlen); - if (ret < *wlen) { + if (ret != *wlen) { error_report("failed to send a req, %s", strerror(errno)); } @@ -546,7 +546,7 @@ static coroutine_fn void do_co_req(void *opaque) qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); - if (ret < sizeof(*hdr)) { + if (ret != sizeof(*hdr)) { error_report("failed to get a rsp, %s", strerror(errno)); ret = -errno; goto out; @@ -558,7 +558,7 @@ static coroutine_fn void do_co_req(void *opaque) if (*rlen) { ret = qemu_co_recv(sockfd, data, *rlen); - if (ret < *rlen) { + if (ret != *rlen) { error_report("failed to get the data, %s", strerror(errno)); ret = -errno; goto out; @@ -669,7 +669,7 @@ static void coroutine_fn aio_read_response(void *opaque) /* read a header */ ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); - if (ret < 0) { + if (ret != sizeof(rsp)) { error_report("failed to get the header, %s", strerror(errno)); goto out; } @@ -720,7 +720,7 @@ static void coroutine_fn aio_read_response(void *opaque) case AIOCB_READ_UDATA: ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, aio_req->iov_offset, rsp.data_length); - if (ret < 0) { + if (ret != rsp.data_length) { error_report("failed to get the data, %s", strerror(errno)); goto out; } @@ -1064,7 +1064,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, /* send a header */ ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); - if (ret < 0) { + if (ret != sizeof(hdr)) { qemu_co_mutex_unlock(&s->lock); error_report("failed to send a req, %s", strerror(errno)); return -errno; @@ -1072,7 +1072,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, if (wlen) { ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); - if (ret < 0) { + if (ret != wlen) { qemu_co_mutex_unlock(&s->lock); error_report("failed to send a data, %s", strerror(errno)); return -errno; From 2412aec745066495f0c91dfcde9258382d7850e9 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:12 +0900 Subject: [PATCH 21/30] sheepdog: handle vdi objects in resend_aio_req The current resend_aio_req() doesn't work when the request is against vdi objects. This fixes the problem. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index c6e57f0685..ddb8bfbf79 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1197,11 +1197,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) return ret; } - aio_req->oid = vid_to_data_oid(s->inode.vdi_id, - data_oid_to_idx(aio_req->oid)); + if (is_data_obj(aio_req->oid)) { + aio_req->oid = vid_to_data_oid(s->inode.vdi_id, + data_oid_to_idx(aio_req->oid)); + } else { + aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); + } /* check whether this request becomes a CoW one */ - if (acb->aiocb_type == AIOCB_WRITE_UDATA) { + if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { int idx = data_oid_to_idx(aio_req->oid); AIOReq *areq; @@ -1229,8 +1233,15 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) create = true; } out: - return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - create, acb->aiocb_type); + if (is_data_obj(aio_req->oid)) { + return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, + create, acb->aiocb_type); + } else { + struct iovec iov; + iov.iov_base = &s->inode; + iov.iov_len = sizeof(s->inode); + return add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + } } /* TODO Convert to fine grained options */ From 72e0996c41d879473bb2aa85c8eeec129ae8ec9b Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:13 +0900 Subject: [PATCH 22/30] sheepdog: reload inode outside of resend_aioreq This prepares for using resend_aioreq() after reconnecting to the sheepdog server. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index ddb8bfbf79..5311fb18a9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -227,6 +227,11 @@ static inline uint64_t data_oid_to_idx(uint64_t oid) return oid & (MAX_DATA_OBJS - 1); } +static inline uint32_t oid_to_vid(uint64_t oid) +{ + return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT; +} + static inline uint64_t vid_to_vdi_oid(uint32_t vid) { return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); @@ -605,7 +610,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); - +static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) { @@ -753,6 +758,19 @@ static void coroutine_fn aio_read_response(void *opaque) case SD_RES_SUCCESS: break; case SD_RES_READONLY: + if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) { + ret = reload_inode(s, 0, ""); + if (ret < 0) { + goto out; + } + } + + if (is_data_obj(aio_req->oid)) { + aio_req->oid = vid_to_data_oid(s->inode.vdi_id, + data_oid_to_idx(aio_req->oid)); + } else { + aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); + } ret = resend_aioreq(s, aio_req); if (ret == SD_RES_SUCCESS) { goto out; @@ -1190,19 +1208,6 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; bool create = false; - int ret; - - ret = reload_inode(s, 0, ""); - if (ret < 0) { - return ret; - } - - if (is_data_obj(aio_req->oid)) { - aio_req->oid = vid_to_data_oid(s->inode.vdi_id, - data_oid_to_idx(aio_req->oid)); - } else { - aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); - } /* check whether this request becomes a CoW one */ if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { From 3ab7bd1917d210eac5d2a80593899ec70c4e998c Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:14 +0900 Subject: [PATCH 23/30] coroutine: add co_aio_sleep_ns() to allow sleep in block drivers This helper function behaves similarly to co_sleep_ns(), but the sleeping coroutine will be resumed when using qemu_aio_wait(). Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- include/block/coroutine.h | 9 +++++++++ qemu-coroutine-sleep.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/block/coroutine.h b/include/block/coroutine.h index 4232569c53..4d5c0cfdd7 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -215,6 +215,15 @@ void qemu_co_rwlock_unlock(CoRwlock *lock); */ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns); +/** + * Yield the coroutine for a given duration + * + * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be + * resumed when using qemu_aio_wait(). + */ +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns); + /** * Yield until a file descriptor becomes readable * diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c index f6db978c1d..ad78fbaa2a 100644 --- a/qemu-coroutine-sleep.c +++ b/qemu-coroutine-sleep.c @@ -13,6 +13,7 @@ #include "block/coroutine.h" #include "qemu/timer.h" +#include "block/aio.h" typedef struct CoSleepCB { QEMUTimer *ts; @@ -37,3 +38,16 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns) timer_del(sleep_cb.ts); timer_free(sleep_cb.ts); } + +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns) +{ + CoSleepCB sleep_cb = { + .co = qemu_coroutine_self(), + }; + sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); + timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); + qemu_coroutine_yield(); + timer_del(sleep_cb.ts); + timer_free(sleep_cb.ts); +} From 011603cacf1cae9212453efd82ec908cd42ce466 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:15 +0900 Subject: [PATCH 24/30] sheepdog: try to reconnect to sheepdog after network error This introduces a failed request queue and links all the inflight requests to the list after network error happens. After QEMU reconnects to the sheepdog server successfully, the sheepdog block driver will retry all the requests in the failed queue. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 80 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 5311fb18a9..fd1447e906 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -304,6 +304,8 @@ struct SheepdogAIOCB { }; typedef struct BDRVSheepdogState { + BlockDriverState *bs; + SheepdogInode inode; uint32_t min_dirty_data_idx; @@ -323,8 +325,11 @@ typedef struct BDRVSheepdogState { Coroutine *co_recv; uint32_t aioreq_seq_num; + + /* Every aio request must be linked to either of these queues. */ QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head; + QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; } BDRVSheepdogState; static const char * sd_strerror(int err) @@ -611,6 +616,8 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, enum AIOCBState aiocb_type); static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); +static int get_sheep_fd(BDRVSheepdogState *s); +static void co_write_request(void *opaque); static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid) { @@ -652,6 +659,51 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) } } +static coroutine_fn void reconnect_to_sdog(void *opaque) +{ + BDRVSheepdogState *s = opaque; + AIOReq *aio_req, *next; + + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL); + close(s->fd); + s->fd = -1; + + /* Wait for outstanding write requests to be completed. */ + while (s->co_send != NULL) { + co_write_request(opaque); + } + + /* Try to reconnect the sheepdog server every one second. */ + while (s->fd < 0) { + s->fd = get_sheep_fd(s); + if (s->fd < 0) { + DPRINTF("Wait for connection to be established\n"); + co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME, + 1000000000ULL); + } + }; + + /* + * Now we have to resend all the request in the inflight queue. However, + * resend_aioreq() can yield and newly created requests can be added to the + * inflight queue before the coroutine is resumed. To avoid mixing them, we + * have to move all the inflight requests to the failed queue before + * resend_aioreq() is called. + */ + QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) { + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings); + } + + /* Resend all the failed aio requests. */ + while (!QLIST_EMPTY(&s->failed_aio_head)) { + aio_req = QLIST_FIRST(&s->failed_aio_head); + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); + resend_aioreq(s, aio_req); + } +} + /* * Receive responses of the I/O requests. * @@ -668,15 +720,11 @@ static void coroutine_fn aio_read_response(void *opaque) SheepdogAIOCB *acb; uint64_t idx; - if (QLIST_EMPTY(&s->inflight_aio_head)) { - goto out; - } - /* read a header */ ret = qemu_co_recv(fd, &rsp, sizeof(rsp)); if (ret != sizeof(rsp)) { error_report("failed to get the header, %s", strerror(errno)); - goto out; + goto err; } /* find the right aio_req from the inflight aio list */ @@ -687,7 +735,7 @@ static void coroutine_fn aio_read_response(void *opaque) } if (!aio_req) { error_report("cannot find aio_req %x", rsp.id); - goto out; + goto err; } acb = aio_req->aiocb; @@ -727,7 +775,7 @@ static void coroutine_fn aio_read_response(void *opaque) aio_req->iov_offset, rsp.data_length); if (ret != rsp.data_length) { error_report("failed to get the data, %s", strerror(errno)); - goto out; + goto err; } break; case AIOCB_FLUSH_CACHE: @@ -761,10 +809,9 @@ static void coroutine_fn aio_read_response(void *opaque) if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) { ret = reload_inode(s, 0, ""); if (ret < 0) { - goto out; + goto err; } } - if (is_data_obj(aio_req->oid)) { aio_req->oid = vid_to_data_oid(s->inode.vdi_id, data_oid_to_idx(aio_req->oid)); @@ -792,6 +839,10 @@ static void coroutine_fn aio_read_response(void *opaque) } out: s->co_recv = NULL; + return; +err: + s->co_recv = NULL; + reconnect_to_sdog(opaque); } static void co_read_response(void *opaque) @@ -1083,22 +1134,20 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, /* send a header */ ret = qemu_co_send(s->fd, &hdr, sizeof(hdr)); if (ret != sizeof(hdr)) { - qemu_co_mutex_unlock(&s->lock); error_report("failed to send a req, %s", strerror(errno)); - return -errno; + goto out; } if (wlen) { ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); if (ret != wlen) { - qemu_co_mutex_unlock(&s->lock); error_report("failed to send a data, %s", strerror(errno)); - return -errno; } } - +out: socket_set_cork(s->fd, 0); qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); + s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); return 0; @@ -1276,6 +1325,8 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; const char *filename; + s->bs = bs; + opts = qemu_opts_create_nofail(&runtime_opts); qemu_opts_absorb_qdict(opts, options, &local_err); if (error_is_set(&local_err)) { @@ -1289,6 +1340,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->pending_aio_head); + QLIST_INIT(&s->failed_aio_head); s->fd = -1; memset(vdi, 0, sizeof(vdi)); From a37dcdf9aea8e19fcec6b1c5aa2c27c325fc4644 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:16 +0900 Subject: [PATCH 25/30] sheepdog: make add_aio_request and send_aioreq void functions These functions no longer return errors. We can make them void functions and simplify the codes. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 66 +++++++++++++----------------------------------- 1 file changed, 17 insertions(+), 49 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index fd1447e906..8790494126 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -611,10 +611,10 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, return srco.ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type); -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req); static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag); static int get_sheep_fd(BDRVSheepdogState *s); static void co_write_request(void *opaque); @@ -640,22 +640,14 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid) { AIOReq *aio_req; SheepdogAIOCB *acb; - int ret; while ((aio_req = find_pending_req(s, oid)) != NULL) { acb = aio_req->aiocb; /* move aio_req from pending list to inflight one */ QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, acb->qiov->iov, - acb->qiov->niov, false, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - if (!acb->nr_pending) { - sd_finish_aiocb(acb); - } - } + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false, + acb->aiocb_type); } } @@ -818,11 +810,8 @@ static void coroutine_fn aio_read_response(void *opaque) } else { aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id); } - ret = resend_aioreq(s, aio_req); - if (ret == SD_RES_SUCCESS) { - goto out; - } - /* fall through */ + resend_aioreq(s, aio_req); + goto out; default: acb->ret = -EIO; error_report("%s", sd_strerror(rsp.result)); @@ -1071,7 +1060,7 @@ out: return ret; } -static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, +static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, struct iovec *iov, int niov, bool create, enum AIOCBState aiocb_type) { @@ -1149,8 +1138,6 @@ out: qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s); s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); - - return 0; } static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies, @@ -1253,7 +1240,7 @@ out: return ret; } -static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) +static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; bool create = false; @@ -1278,7 +1265,7 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid); QLIST_REMOVE(aio_req, aio_siblings); QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); - return SD_RES_SUCCESS; + return; } } @@ -1288,13 +1275,13 @@ static int coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) } out: if (is_data_obj(aio_req->oid)) { - return add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - create, acb->aiocb_type); + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + acb->aiocb_type); } else { struct iovec iov; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); - return add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); + add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); } } @@ -1697,7 +1684,6 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) */ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) { - int ret; BDRVSheepdogState *s = acb->common.bs->opaque; struct iovec iov; AIOReq *aio_req; @@ -1719,18 +1705,13 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), data_len, offset, 0, 0, offset); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); - if (ret) { - free_aio_req(s, aio_req); - acb->ret = -EIO; - goto out; - } + add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA); acb->aio_done_func = sd_finish_aiocb; acb->aiocb_type = AIOCB_WRITE_UDATA; return; } -out: + sd_finish_aiocb(acb); } @@ -1937,14 +1918,8 @@ static int coroutine_fn sd_co_rw_vector(void *p) } QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, - create, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - acb->ret = -EIO; - goto out; - } + add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, + acb->aiocb_type); done: offset = 0; idx++; @@ -2012,7 +1987,6 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) BDRVSheepdogState *s = bs->opaque; SheepdogAIOCB *acb; AIOReq *aio_req; - int ret; if (s->cache_flags != SD_FLAG_CMD_CACHE) { return 0; @@ -2025,13 +1999,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), 0, 0, 0, 0, 0); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); - ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); - if (ret < 0) { - error_report("add_aio_request is failed"); - free_aio_req(s, aio_req); - qemu_aio_release(acb); - return ret; - } + add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); qemu_coroutine_yield(); return acb->ret; From 35200687a1e04a79b0345be476185dc23d1604fb Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:17 +0900 Subject: [PATCH 26/30] sheepdog: cancel aio requests if possible This patch tries to cancel aio requests in pending queue and failed queue. When the sheepdog driver cannot cancel the requests, it waits for them to be completed. Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 70 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 8790494126..eebb5feb31 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -299,7 +299,8 @@ struct SheepdogAIOCB { Coroutine *coroutine; void (*aio_done_func)(SheepdogAIOCB *); - bool canceled; + bool cancelable; + bool *finished; int nr_pending; }; @@ -418,6 +419,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; + acb->cancelable = false; QLIST_REMOVE(aio_req, aio_siblings); g_free(aio_req); @@ -426,23 +428,68 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb) { - if (!acb->canceled) { - qemu_coroutine_enter(acb->coroutine, NULL); + qemu_coroutine_enter(acb->coroutine, NULL); + if (acb->finished) { + *acb->finished = true; } qemu_aio_release(acb); } +/* + * Check whether the specified acb can be canceled + * + * We can cancel aio when any request belonging to the acb is: + * - Not processed by the sheepdog server. + * - Not linked to the inflight queue. + */ +static bool sd_acb_cancelable(const SheepdogAIOCB *acb) +{ + BDRVSheepdogState *s = acb->common.bs->opaque; + AIOReq *aioreq; + + if (!acb->cancelable) { + return false; + } + + QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) { + if (aioreq->aiocb == acb) { + return false; + } + } + + return true; +} + static void sd_aio_cancel(BlockDriverAIOCB *blockacb) { SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; + BDRVSheepdogState *s = acb->common.bs->opaque; + AIOReq *aioreq, *next; + bool finished = false; - /* - * Sheepdog cannot cancel the requests which are already sent to - * the servers, so we just complete the request with -EIO here. - */ - acb->ret = -EIO; - qemu_coroutine_enter(acb->coroutine, NULL); - acb->canceled = true; + acb->finished = &finished; + while (!finished) { + if (sd_acb_cancelable(acb)) { + /* Remove outstanding requests from pending and failed queues. */ + QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); + } + } + QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings, + next) { + if (aioreq->aiocb == acb) { + free_aio_req(s, aioreq); + } + } + + assert(acb->nr_pending == 0); + sd_finish_aiocb(acb); + return; + } + qemu_aio_wait(); + } } static const AIOCBInfo sd_aiocb_info = { @@ -463,7 +510,8 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->nb_sectors = nb_sectors; acb->aio_done_func = NULL; - acb->canceled = false; + acb->cancelable = true; + acb->finished = NULL; acb->coroutine = qemu_coroutine_self(); acb->ret = 0; acb->nr_pending = 0; From 80308d33ec70834a80351a79eba106049b44a366 Mon Sep 17 00:00:00 2001 From: MORITA Kazutaka Date: Thu, 24 Oct 2013 16:01:18 +0900 Subject: [PATCH 27/30] sheepdog: check simultaneous create in resend_aioreq After reconnection happens, all the inflight requests are moved to the failed request list. As a result, sd_co_rw_vector() can send another create request before resend_aioreq() resends a create request from the failed list. This patch adds a helper function check_simultaneous_create() and checks simultaneous create requests more strictly in resend_aioreq(). Signed-off-by: MORITA Kazutaka Tested-by: Liu Yuan Reviewed-by: Liu Yuan Signed-off-by: Kevin Wolf --- block/sheepdog.c | 64 ++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index eebb5feb31..ef387de71f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1288,6 +1288,29 @@ out: return ret; } +/* Return true if the specified request is linked to the pending list. */ +static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req) +{ + AIOReq *areq; + QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { + if (areq != aio_req && areq->oid == aio_req->oid) { + /* + * Sheepdog cannot handle simultaneous create requests to the same + * object, so we cannot send the request until the previous request + * finishes. + */ + DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid); + aio_req->flags = 0; + aio_req->base_oid = 0; + QLIST_REMOVE(aio_req, aio_siblings); + QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); + return true; + } + } + + return false; +} + static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) { SheepdogAIOCB *acb = aio_req->aiocb; @@ -1296,29 +1319,19 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req) /* check whether this request becomes a CoW one */ if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) { int idx = data_oid_to_idx(aio_req->oid); - AIOReq *areq; - if (s->inode.data_vdi_id[idx] == 0) { - create = true; - goto out; - } if (is_data_obj_writable(&s->inode, idx)) { goto out; } - /* link to the pending list if there is another CoW request to - * the same object */ - QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { - if (areq != aio_req && areq->oid == aio_req->oid) { - DPRINTF("simultaneous CoW to %" PRIx64 "\n", aio_req->oid); - QLIST_REMOVE(aio_req, aio_siblings); - QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings); - return; - } + if (check_simultaneous_create(s, aio_req)) { + return; } - aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); - aio_req->flags |= SD_FLAG_CMD_COW; + if (s->inode.data_vdi_id[idx]) { + aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx); + aio_req->flags |= SD_FLAG_CMD_COW; + } create = true; } out: @@ -1945,27 +1958,14 @@ static int coroutine_fn sd_co_rw_vector(void *p) } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); if (create) { - AIOReq *areq; - QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) { - if (areq->oid == oid) { - /* - * Sheepdog cannot handle simultaneous create - * requests to the same object. So we cannot send - * the request until the previous request - * finishes. - */ - aio_req->flags = 0; - aio_req->base_oid = 0; - QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, - aio_siblings); - goto done; - } + if (check_simultaneous_create(s, aio_req)) { + goto done; } } - QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create, acb->aiocb_type); done: From 7890111b642e8e03430c3bf8bd6cedee26cec4fe Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 30 Oct 2013 17:42:28 +0800 Subject: [PATCH 28/30] qemu-iotests: prefill some data to test image Case 030 occasionally fails because of block job compltes too fast to be captured by script, and 'unexpected qmp event' of job completion causes the test failure. Simply fill in some data to the test image to make this false alarm less likely to happen. (For other benefits to prefill data to test image, see also commit ab68cdfaa). Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index ae56f3b808..d0f96ea0e1 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -388,7 +388,9 @@ class TestStreamStop(iotests.QMPTestCase): def setUp(self): qemu_img('create', backing_img, str(TestStreamStop.image_len)) + qemu_io('-c', 'write -P 0x1 0 32M', backing_img) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + qemu_io('-c', 'write -P 0x1 32M 32M', test_img) self.vm = iotests.VM().add_drive(test_img) self.vm.launch() @@ -414,7 +416,9 @@ class TestSetSpeed(iotests.QMPTestCase): def setUp(self): qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_io('-c', 'write -P 0x1 0 32M', backing_img) qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + qemu_io('-c', 'write -P 0x1 32M 32M', test_img) self.vm = iotests.VM().add_drive(test_img) self.vm.launch() From cbe82d7fb32e5d8e76434671d50853df5f50d560 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Fri, 18 Oct 2013 11:12:44 +0800 Subject: [PATCH 29/30] qapi: Add optional field 'compressed' to ImageInfo Signed-off-by: Fam Zheng Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- qapi-schema.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qapi-schema.json b/qapi-schema.json index 60f3fd1db6..add97e296d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -256,6 +256,8 @@ # # @encrypted: #optional true if the image is encrypted # +# @compressed: #optional true if the image is compressed (Since 1.7) +# # @backing-filename: #optional name of the backing file # # @full-backing-filename: #optional full path of the backing file @@ -276,7 +278,7 @@ { 'type': 'ImageInfo', 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', '*actual-size': 'int', 'virtual-size': 'int', - '*cluster-size': 'int', '*encrypted': 'bool', + '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], '*backing-image': 'ImageInfo', From f4c129a38a5430b7342a7a23f53a22831154612f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 31 Oct 2013 10:06:23 +0800 Subject: [PATCH 30/30] vmdk: Implment bdrv_get_specific_info Implement .bdrv_get_specific_info to return the extent information. Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/vmdk.c | 68 +++++++++++++++++++++++++++++++++++++- qapi-schema.json | 24 +++++++++++++- tests/qemu-iotests/059 | 2 +- tests/qemu-iotests/059.out | 5 ++- 4 files changed, 93 insertions(+), 6 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 32ec8b7766..a7ebd0f125 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -106,6 +106,7 @@ typedef struct VmdkExtent { uint32_t l2_cache_counts[L2_CACHE_SIZE]; int64_t cluster_sectors; + char *type; } VmdkExtent; typedef struct BDRVVmdkState { @@ -113,11 +114,13 @@ typedef struct BDRVVmdkState { uint64_t desc_offset; bool cid_updated; bool cid_checked; + uint32_t cid; uint32_t parent_cid; int num_extents; /* Extent array with num_extents entries, ascend ordered by address */ VmdkExtent *extents; Error *migration_blocker; + char *create_type; } BDRVVmdkState; typedef struct VmdkMetaData { @@ -214,6 +217,7 @@ static void vmdk_free_extents(BlockDriverState *bs) g_free(e->l1_table); g_free(e->l2_cache); g_free(e->l1_backup_table); + g_free(e->type); if (e->file != bs->file) { bdrv_unref(e->file); } @@ -534,6 +538,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, uint32_t l1_size, l1_entry_sectors; VMDK4Header header; VmdkExtent *extent; + BDRVVmdkState *s = bs->opaque; int64_t l1_backup_offset = 0; ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); @@ -549,6 +554,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, } } + if (!s->create_type) { + s->create_type = g_strdup("monolithicSparse"); + } + if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) { /* * The footer takes precedence over the header, so read it in. The @@ -709,6 +718,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, int64_t flat_offset; char extent_path[PATH_MAX]; BlockDriverState *extent_file; + BDRVVmdkState *s = bs->opaque; + VmdkExtent *extent; while (*p) { /* parse extent line: @@ -751,7 +762,6 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, /* save to extents array */ if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) { /* FLAT extent */ - VmdkExtent *extent; ret = vmdk_add_extent(bs, extent_file, true, sectors, 0, 0, 0, 0, 0, &extent, errp); @@ -766,10 +776,12 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, bdrv_unref(extent_file); return ret; } + extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); return -ENOTSUP; } + extent->type = g_strdup(type); next_line: /* move to next line */ while (*p) { @@ -817,6 +829,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, ret = -ENOTSUP; goto exit; } + s->create_type = g_strdup(ct); s->desc_offset = 0; ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp); exit: @@ -843,6 +856,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, if (ret) { goto fail; } + s->cid = vmdk_read_cid(bs, 0); s->parent_cid = vmdk_read_cid(bs, 1); qemu_co_mutex_init(&s->lock); @@ -855,6 +869,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, return 0; fail: + g_free(s->create_type); + s->create_type = NULL; vmdk_free_extents(bs); return ret; } @@ -1766,6 +1782,7 @@ static void vmdk_close(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; vmdk_free_extents(bs); + g_free(s->create_type); migrate_del_blocker(s->migration_blocker); error_free(s->migration_blocker); @@ -1827,6 +1844,54 @@ static int vmdk_has_zero_init(BlockDriverState *bs) return 1; } +static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) +{ + ImageInfo *info = g_new0(ImageInfo, 1); + + *info = (ImageInfo){ + .filename = g_strdup(extent->file->filename), + .format = g_strdup(extent->type), + .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, + .compressed = extent->compressed, + .has_compressed = extent->compressed, + .cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE, + .has_cluster_size = !extent->flat, + }; + + return info; +} + +static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) +{ + int i; + BDRVVmdkState *s = bs->opaque; + ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); + ImageInfoList **next; + + *spec_info = (ImageInfoSpecific){ + .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK, + { + .vmdk = g_new0(ImageInfoSpecificVmdk, 1), + }, + }; + + *spec_info->vmdk = (ImageInfoSpecificVmdk) { + .create_type = g_strdup(s->create_type), + .cid = s->cid, + .parent_cid = s->parent_cid, + }; + + next = &spec_info->vmdk->extents; + for (i = 0; i < s->num_extents; i++) { + *next = g_new0(ImageInfoList, 1); + (*next)->value = vmdk_get_extent_info(&s->extents[i]); + (*next)->next = NULL; + next = &(*next)->next; + } + + return spec_info; +} + static QEMUOptionParameter vmdk_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1879,6 +1944,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_co_get_block_status = vmdk_co_get_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, + .bdrv_get_specific_info = vmdk_get_specific_info, .create_options = vmdk_create_options, }; diff --git a/qapi-schema.json b/qapi-schema.json index add97e296d..d607258122 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -224,6 +224,27 @@ '*lazy-refcounts': 'bool' } } +## +# @ImageInfoSpecificVmdk: +# +# @create_type: The create type of VMDK image +# +# @cid: Content id of image +# +# @parent-cid: Parent VMDK image's cid +# +# @extents: List of extent files +# +# Since: 1.7 +## +{ 'type': 'ImageInfoSpecificVmdk', + 'data': { + 'create-type': 'str', + 'cid': 'int', + 'parent-cid': 'int', + 'extents': ['ImageInfo'] + } } + ## # @ImageInfoSpecific: # @@ -234,7 +255,8 @@ { 'union': 'ImageInfoSpecific', 'data': { - 'qcow2': 'ImageInfoSpecificQCow2' + 'qcow2': 'ImageInfoSpecificQCow2', + 'vmdk': 'ImageInfoSpecificVmdk' } } ## diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059 index b81c575d94..6a27ac978b 100755 --- a/tests/qemu-iotests/059 +++ b/tests/qemu-iotests/059 @@ -69,7 +69,7 @@ poke_file "$TEST_IMG" "$grain_table_size_offset" "\x01\x00\x00\x00" echo echo "=== Testing monolithicFlat creation and opening ===" IMGOPTS="subformat=monolithicFlat" _make_test_img 2G -$QEMU_IMG info $TEST_IMG | _filter_testdir +_img_info echo echo "=== Testing monolithicFlat with zeroed_grain ===" diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index 9b12efb466..2ded8a950a 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -18,10 +18,9 @@ no file open, try 'help open' === Testing monolithicFlat creation and opening === Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2147483648 -image: TEST_DIR/t.vmdk -file format: vmdk +image: TEST_DIR/t.IMGFMT +file format: IMGFMT virtual size: 2.0G (2147483648 bytes) -disk size: 4.0K === Testing monolithicFlat with zeroed_grain === qemu-img: TEST_DIR/t.IMGFMT: Flat image can't enable zeroed grain