diff --git a/block/Makefile.objs b/block/Makefile.objs index 04b0e43eb1..010afad71a 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -20,6 +20,7 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o block-obj-y += accounting.o +block-obj-y += write-threshold.o common-obj-y += stream.o common-obj-y += commit.o diff --git a/block/qapi.c b/block/qapi.c index d1a891796c..1808e67336 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -24,6 +24,7 @@ #include "block/qapi.h" #include "block/block_int.h" +#include "block/write-threshold.h" #include "qmp-commands.h" #include "qapi-visit.h" #include "qapi/qmp-output-visitor.h" @@ -89,6 +90,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs) info->iops_size = cfg.op_size; } + info->write_threshold = bdrv_write_threshold_get(bs); + return info; } diff --git a/block/write-threshold.c b/block/write-threshold.c new file mode 100644 index 0000000000..c2cd517716 --- /dev/null +++ b/block/write-threshold.c @@ -0,0 +1,125 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include "block/block_int.h" +#include "block/coroutine.h" +#include "block/write-threshold.h" +#include "qemu/notify.h" +#include "qapi-event.h" +#include "qmp-commands.h" + + +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) +{ + return bs->write_threshold_offset; +} + +bool bdrv_write_threshold_is_set(const BlockDriverState *bs) +{ + return bs->write_threshold_offset > 0; +} + +static void write_threshold_disable(BlockDriverState *bs) +{ + if (bdrv_write_threshold_is_set(bs)) { + notifier_with_return_remove(&bs->write_threshold_notifier); + bs->write_threshold_offset = 0; + } +} + +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (req->offset > bs->write_threshold_offset) { + return (req->offset - bs->write_threshold_offset) + req->bytes; + } + if ((req->offset + req->bytes) > bs->write_threshold_offset) { + return (req->offset + req->bytes) - bs->write_threshold_offset; + } + } + return 0; +} + +static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, + void *opaque) +{ + BdrvTrackedRequest *req = opaque; + BlockDriverState *bs = req->bs; + uint64_t amount = 0; + + amount = bdrv_write_threshold_exceeded(bs, req); + if (amount > 0) { + qapi_event_send_block_write_threshold( + bs->node_name, + amount, + bs->write_threshold_offset, + &error_abort); + + /* autodisable to avoid flooding the monitor */ + write_threshold_disable(bs); + } + + return 0; /* should always let other notifiers run */ +} + +static void write_threshold_register_notifier(BlockDriverState *bs) +{ + bs->write_threshold_notifier.notify = before_write_notify; + notifier_with_return_list_add(&bs->before_write_notifiers, + &bs->write_threshold_notifier); +} + +static void write_threshold_update(BlockDriverState *bs, + int64_t threshold_bytes) +{ + bs->write_threshold_offset = threshold_bytes; +} + +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (threshold_bytes > 0) { + write_threshold_update(bs, threshold_bytes); + } else { + write_threshold_disable(bs); + } + } else { + if (threshold_bytes > 0) { + /* avoid multiple registration */ + write_threshold_register_notifier(bs); + write_threshold_update(bs, threshold_bytes); + } + /* discard bogus disable request */ + } +} + +void qmp_block_set_write_threshold(const char *node_name, + uint64_t threshold_bytes, + Error **errp) +{ + BlockDriverState *bs; + AioContext *aio_context; + + bs = bdrv_find_node(node_name); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, node_name); + return; + } + + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + bdrv_write_threshold_set(bs, threshold_bytes); + + aio_context_release(aio_context); +} diff --git a/include/block/block_int.h b/include/block/block_int.h index e264be97b2..7ad19503df 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -412,6 +412,10 @@ struct BlockDriverState { /* The error object in use for blocking operations on backing_hd */ Error *backing_blocker; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; }; diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h new file mode 100644 index 0000000000..f1b899cd5f --- /dev/null +++ b/include/block/write-threshold.h @@ -0,0 +1,64 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#ifndef BLOCK_WRITE_THRESHOLD_H +#define BLOCK_WRITE_THRESHOLD_H + +#include + +#include "qemu/typedefs.h" +#include "qemu-common.h" + +/* + * bdrv_write_threshold_set: + * + * Set the write threshold for block devices, in bytes. + * Notify when a write exceeds the threshold, meaning the device + * is becoming full, so it can be transparently resized. + * To be used with thin-provisioned block devices. + * + * Use threshold_bytes == 0 to disable. + */ +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); + +/* + * bdrv_write_threshold_get + * + * Get the configured write threshold, in bytes. + * Zero means no threshold configured. + */ +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_is_set + * + * Tell if a write threshold is set for a given BDS. + */ +bool bdrv_write_threshold_is_set(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_exceeded + * + * Return the extent of a write request that exceeded the threshold, + * or zero if the request is below the threshold. + * Return zero also if the threshold was not set. + * + * NOTE: here we assume the following holds for each request this code + * deals with: + * + * assert((req->offset + req->bytes) <= UINT64_MAX) + * + * Please not there is *not* an actual C assert(). + */ +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req); + +#endif diff --git a/qapi/block-core.json b/qapi/block-core.json index b7d9772444..a3fdaf02ba 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -257,6 +257,9 @@ # # @cache: the cache mode used for the block device (since: 2.3) # +# @write_threshold: configured write threshold for the device. +# 0 if disabled. (Since 2.3) +# # Since: 0.14.0 # ## @@ -271,7 +274,8 @@ '*bps_max': 'int', '*bps_rd_max': 'int', '*bps_wr_max': 'int', '*iops_max': 'int', '*iops_rd_max': 'int', '*iops_wr_max': 'int', - '*iops_size': 'int', 'cache': 'BlockdevCacheInfo' } } + '*iops_size': 'int', 'cache': 'BlockdevCacheInfo', + 'write_threshold': 'int' } } ## # @BlockDeviceIoStatus: @@ -1917,3 +1921,48 @@ ## { 'enum': 'PreallocMode', 'data': [ 'off', 'metadata', 'falloc', 'full' ] } + +## +# @BLOCK_WRITE_THRESHOLD +# +# Emitted when writes on block device reaches or exceeds the +# configured write threshold. For thin-provisioned devices, this +# means the device should be extended to avoid pausing for +# disk exhaustion. +# The event is one shot. Once triggered, it needs to be +# re-registered with another block-set-threshold command. +# +# @node-name: graph node name on which the threshold was exceeded. +# +# @amount-exceeded: amount of data which exceeded the threshold, in bytes. +# +# @write-threshold: last configured threshold, in bytes. +# +# Since: 2.3 +## +{ 'event': 'BLOCK_WRITE_THRESHOLD', + 'data': { 'node-name': 'str', + 'amount-exceeded': 'uint64', + 'write-threshold': 'uint64' } } + +## +# @block-set-write-threshold +# +# Change the write threshold for a block drive. An event will be delivered +# if a write to this block drive crosses the configured threshold. +# This is useful to transparently resize thin-provisioned drives without +# the guest OS noticing. +# +# @node-name: graph node name on which the threshold must be set. +# +# @write-threshold: configured threshold for the block device, bytes. +# Use 0 to disable the threshold. +# +# Returns: Nothing on success +# If @node name is not found on the block device graph, +# DeviceNotFound +# +# Since: 2.3 +## +{ 'command': 'block-set-write-threshold', + 'data': { 'node-name': 'str', 'write-threshold': 'uint64' } } diff --git a/qmp-commands.hx b/qmp-commands.hx index af3fd19935..a85d8479e3 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2146,6 +2146,8 @@ Each json-object contain the following: - "iops_size": I/O size when limiting by iops (json-int) - "detect_zeroes": detect and optimize zero writing (json-string) - Possible values: "off", "on", "unmap" + - "write_threshold": write offset threshold in bytes, a event will be + emitted if crossed. Zero if disabled (json-int) - "image": the detail of the image, it is a json-object containing the following: - "filename": image file name (json-string) @@ -2223,6 +2225,7 @@ Example: "iops_wr_max": 0, "iops_size": 0, "detect_zeroes": "on", + "write_threshold": 0, "image":{ "filename":"disks/test.qcow2", "format":"qcow2", @@ -3685,6 +3688,7 @@ Example: "iops_rd_max": 0, "iops_wr_max": 0, "iops_size": 0, + "write_threshold": 0, "image":{ "filename":"disks/test.qcow2", "format":"qcow2", @@ -3920,4 +3924,32 @@ Move mouse pointer to absolute coordinates (20000, 400). { "type": "abs", "data" : { "axis": "Y", "value" : 400 } } ] } } <- { "return": {} } +EQMP + + { + .name = "block-set-write-threshold", + .args_type = "node-name:s,write-threshold:l", + .mhandler.cmd_new = qmp_marshal_input_block_set_write_threshold, + }, + +SQMP +block-set-write-threshold +------------ + +Change the write threshold for a block drive. The threshold is an offset, +thus must be non-negative. Default is no write threshold. +Setting the threshold to zero disables it. + +Arguments: + +- "node-name": the node name in the block driver state graph (json-string) +- "write-threshold": the write threshold in bytes (json-int) + +Example: + +-> { "execute": "block-set-write-threshold", + "arguments": { "node-name": "mydev", + "write-threshold": 17179869184 } } +<- { "return": {} } + EQMP diff --git a/tests/Makefile b/tests/Makefile index 5caccf765a..d5df16882d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -68,6 +68,8 @@ check-unit-y += tests/check-qom-interface$(EXESUF) gcov-files-check-qom-interface-y = qom/object.c check-unit-y += tests/test-qemu-opts$(EXESUF) gcov-files-test-qemu-opts-y = qom/test-qemu-opts.c +check-unit-y += tests/test-write-threshold$(EXESUF) +gcov-files-test-write-threshold-y = block/write-threshold.c check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -360,6 +362,7 @@ tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a +tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(block-obj-y) libqemuutil.a libqemustub.a ifeq ($(CONFIG_POSIX),y) LIBS += -lutil diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 13ff3cd7aa..00b3eaefc7 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -43,6 +43,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -218,6 +219,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -423,6 +425,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -607,6 +610,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, @@ -717,6 +721,7 @@ Testing: "drv": "qcow2", "iops": 0, "bps_wr": 0, + "write_threshold": 0, "encrypted": false, "bps": 0, "bps_rd": 0, diff --git a/tests/test-write-threshold.c b/tests/test-write-threshold.c new file mode 100644 index 0000000000..faffa7b855 --- /dev/null +++ b/tests/test-write-threshold.c @@ -0,0 +1,119 @@ +/* + * Test block device write threshold + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include +#include +#include "block/block_int.h" +#include "block/write-threshold.h" + + +static void test_threshold_not_set_on_init(void) +{ + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + g_assert(!bdrv_write_threshold_is_set(&bs)); + + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, 0); +} + +static void test_threshold_set_get(void) +{ + uint64_t threshold = 4 * 1024 * 1024; + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + bdrv_write_threshold_set(&bs, threshold); + + g_assert(bdrv_write_threshold_is_set(&bs)); + + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, threshold); +} + +static void test_threshold_multi_set_get(void) +{ + uint64_t threshold1 = 4 * 1024 * 1024; + uint64_t threshold2 = 15 * 1024 * 1024; + uint64_t res; + BlockDriverState bs; + memset(&bs, 0, sizeof(bs)); + + bdrv_write_threshold_set(&bs, threshold1); + bdrv_write_threshold_set(&bs, threshold2); + res = bdrv_write_threshold_get(&bs); + g_assert_cmpint(res, ==, threshold2); +} + +static void test_threshold_not_trigger(void) +{ + uint64_t amount = 0; + uint64_t threshold = 4 * 1024 * 1024; + BlockDriverState bs; + BdrvTrackedRequest req; + + memset(&bs, 0, sizeof(bs)); + memset(&req, 0, sizeof(req)); + req.offset = 1024; + req.bytes = 1024; + + bdrv_write_threshold_set(&bs, threshold); + amount = bdrv_write_threshold_exceeded(&bs, &req); + g_assert_cmpuint(amount, ==, 0); +} + + +static void test_threshold_trigger(void) +{ + uint64_t amount = 0; + uint64_t threshold = 4 * 1024 * 1024; + BlockDriverState bs; + BdrvTrackedRequest req; + + memset(&bs, 0, sizeof(bs)); + memset(&req, 0, sizeof(req)); + req.offset = (4 * 1024 * 1024) - 1024; + req.bytes = 2 * 1024; + + bdrv_write_threshold_set(&bs, threshold); + amount = bdrv_write_threshold_exceeded(&bs, &req); + g_assert_cmpuint(amount, >=, 1024); +} + +typedef struct TestStruct { + const char *name; + void (*func)(void); +} TestStruct; + + +int main(int argc, char **argv) +{ + size_t i; + TestStruct tests[] = { + { "/write-threshold/not-set-on-init", + test_threshold_not_set_on_init }, + { "/write-threshold/set-get", + test_threshold_set_get }, + { "/write-threshold/multi-set-get", + test_threshold_multi_set_get }, + { "/write-threshold/not-trigger", + test_threshold_not_trigger }, + { "/write-threshold/trigger", + test_threshold_trigger }, + { NULL, NULL } + }; + + g_test_init(&argc, &argv, NULL); + for (i = 0; tests[i].name != NULL; i++) { + g_test_add_func(tests[i].name, tests[i].func); + } + return g_test_run(); +}