diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 39cda7f907..2e76de027c 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1925,6 +1925,10 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t new_l2_bitmap = old_l2_bitmap; QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, old_l2_entry); + bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) && + !full_discard && + (s->discard_no_unref && + type == QCOW2_DISCARD_REQUEST); /* * If full_discard is true, the cluster should not read back as zeroes, @@ -1943,10 +1947,22 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, new_l2_entry = new_l2_bitmap = 0; } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { if (has_subclusters(s)) { - new_l2_entry = 0; + if (keep_reference) { + new_l2_entry = old_l2_entry; + } else { + new_l2_entry = 0; + } new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; } else { - new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; + if (s->qcow_version >= 3) { + if (keep_reference) { + new_l2_entry |= QCOW_OFLAG_ZERO; + } else { + new_l2_entry = QCOW_OFLAG_ZERO; + } + } else { + new_l2_entry = 0; + } } } @@ -1960,8 +1976,16 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, if (has_subclusters(s)) { set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); } - /* Then decrease the refcount */ - qcow2_free_any_cluster(bs, old_l2_entry, type); + if (!keep_reference) { + /* Then decrease the refcount */ + qcow2_free_any_cluster(bs, old_l2_entry, type); + } else if (s->discard_passthrough[type] && + (cluster_type == QCOW2_CLUSTER_NORMAL || + cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) { + /* If we keep the reference, pass on the discard still */ + bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK, + s->cluster_size); + } } qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); diff --git a/block/qcow2.c b/block/qcow2.c index 7f3948360d..e23edd48c2 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -682,6 +682,7 @@ static const char *const mutable_opts[] = { QCOW2_OPT_DISCARD_REQUEST, QCOW2_OPT_DISCARD_SNAPSHOT, QCOW2_OPT_DISCARD_OTHER, + QCOW2_OPT_DISCARD_NO_UNREF, QCOW2_OPT_OVERLAP, QCOW2_OPT_OVERLAP_TEMPLATE, QCOW2_OPT_OVERLAP_MAIN_HEADER, @@ -726,6 +727,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Generate discard requests when other clusters are freed", }, + { + .name = QCOW2_OPT_DISCARD_NO_UNREF, + .type = QEMU_OPT_BOOL, + .help = "Do not unreference discarded clusters", + }, { .name = QCOW2_OPT_OVERLAP, .type = QEMU_OPT_STRING, @@ -969,6 +975,7 @@ typedef struct Qcow2ReopenState { bool use_lazy_refcounts; int overlap_check; bool discard_passthrough[QCOW2_DISCARD_MAX]; + bool discard_no_unref; uint64_t cache_clean_interval; QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ } Qcow2ReopenState; @@ -1140,6 +1147,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs, r->discard_passthrough[QCOW2_DISCARD_OTHER] = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); + r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF, + false); + if (r->discard_no_unref && s->qcow_version < 3) { + error_setg(errp, + "discard-no-unref is only supported since qcow2 version 3"); + ret = -EINVAL; + goto fail; + } + switch (s->crypt_method_header) { case QCOW_CRYPT_NONE: if (encryptfmt) { @@ -1220,6 +1236,8 @@ static void qcow2_update_options_commit(BlockDriverState *bs, s->discard_passthrough[i] = r->discard_passthrough[i]; } + s->discard_no_unref = r->discard_no_unref; + if (s->cache_clean_interval != r->cache_clean_interval) { cache_clean_timer_del(bs); s->cache_clean_interval = r->cache_clean_interval; diff --git a/block/qcow2.h b/block/qcow2.h index 4f67eb912a..ea9adb5706 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -133,6 +133,7 @@ #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request" #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot" #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other" +#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref" #define QCOW2_OPT_OVERLAP "overlap-check" #define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template" #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header" @@ -385,6 +386,8 @@ typedef struct BDRVQcow2State { bool discard_passthrough[QCOW2_DISCARD_MAX]; + bool discard_no_unref; + int overlap_check; /* bitmask of Qcow2MetadataOverlap values */ bool signaled_corruption; diff --git a/qapi/block-core.json b/qapi/block-core.json index 4bf89171c6..5dd5f7e4b0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3478,6 +3478,17 @@ # @pass-discard-other: whether discard requests for the data source # should be issued on other occasions where a cluster gets freed # +# @discard-no-unref: when enabled, discards from the guest will not cause +# cluster allocations to be relinquished. This prevents qcow2 fragmentation +# that would be caused by such discards. Besides potential +# performance degradation, such fragmentation can lead to increased +# allocation of clusters past the end of the image file, +# resulting in image files whose file length can grow much larger +# than their guest disk size would suggest. +# If image file length is of concern (e.g. when storing qcow2 +# images directly on block devices), you should consider enabling +# this option. (since 8.1) +# # @overlap-check: which overlap checks to perform for writes to the # image, defaults to 'cached' (since 2.2) # @@ -3516,6 +3527,7 @@ '*pass-discard-request': 'bool', '*pass-discard-snapshot': 'bool', '*pass-discard-other': 'bool', + '*discard-no-unref': 'bool', '*overlap-check': 'Qcow2OverlapChecks', '*cache-size': 'int', '*l2-cache-size': 'int', diff --git a/qemu-options.hx b/qemu-options.hx index b37eb9662b..b57489d7ca 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1431,6 +1431,18 @@ SRST issued on other occasions where a cluster gets freed (on/off; default: off) + ``discard-no-unref`` + When enabled, discards from the guest will not cause cluster + allocations to be relinquished. This prevents qcow2 fragmentation + that would be caused by such discards. Besides potential + performance degradation, such fragmentation can lead to increased + allocation of clusters past the end of the image file, + resulting in image files whose file length can grow much larger + than their guest disk size would suggest. + If image file length is of concern (e.g. when storing qcow2 + images directly on block devices), you should consider enabling + this option. + ``overlap-check`` Which overlap checks to perform for writes to the image (none/constant/cached/all; default: cached). For details or