multifd: Copy pages before compressing them with zlib

zlib_send_prepare() compresses pages of a running VM. zlib does not make any thread-safety guarantees with respect to changing deflate() input concurrently with deflate() [1]. One can observe problems due to this with the IBM zEnterprise Data Compression accelerator capable zlib [2]. When the hardware acceleration is enabled, migration/multifd/tcp/plain/zlib test fails intermittently [3] due to sliding window corruption. The accelerator's architecture explicitly discourages concurrent accesses [4]: Page 26-57, "Other Conditions": As observed by this CPU, other CPUs, and channel programs, references to the parameter block, first, second, and third operands may be multiple-access references, accesses to these storage locations are not necessarily block-concurrent, and the sequence of these accesses or references is undefined. Mark Adler pointed out that vanilla zlib performs double fetches under certain circumstances as well [5], therefore we need to copy data before passing it to deflate(). [1] https://zlib.net/manual.html [2] https://github.com/madler/zlib/pull/410 [3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html [4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf [5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2024-11-05 20:35:44 +00:00 · 2022-07-05 22:35:59 +02:00 · 2022-07-05 22:35:59 +02:00 · 007e179ef0
commit 007e179ef0
parent 8aff6f501d
1 changed files with 30 additions and 8 deletions
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@ -27,6 +27,8 @@ struct zlib_data {
    uint8_t *zbuff;
    /* size of compressed buffer */
    uint32_t zbuff_len;
+    /* uncompressed buffer of size qemu_target_page_size() */
+    uint8_t *buf;
 };

 /* Multifd zlib compression */
@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
 {
    struct zlib_data *z = g_new0(struct zlib_data, 1);
    z_stream *zs = &z->zs;
+    const char *err_msg;

    zs->zalloc = Z_NULL;
    zs->zfree = Z_NULL;
    zs->opaque = Z_NULL;
    if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
-        g_free(z);
-        error_setg(errp, "multifd %u: deflate init failed", p->id);
-        return -1;
+        err_msg = "deflate init failed";
+        goto err_free_z;
    }
    /* This is the maxium size of the compressed buffer */
    z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
    z->zbuff = g_try_malloc(z->zbuff_len);
    if (!z->zbuff) {
-        deflateEnd(&z->zs);
-        g_free(z);
-        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
-        return -1;
+        err_msg = "out of memory for zbuff";
+        goto err_deflate_end;
+    }
+    z->buf = g_try_malloc(qemu_target_page_size());
+    if (!z->buf) {
+        err_msg = "out of memory for buf";
+        goto err_free_zbuff;
    }
    p->data = z;
    return 0;
+
+err_free_zbuff:
+    g_free(z->zbuff);
+err_deflate_end:
+    deflateEnd(&z->zs);
+err_free_z:
+    g_free(z);
+    error_setg(errp, "multifd %u: %s", p->id, err_msg);
+    return -1;
 }

 /**
@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
    deflateEnd(&z->zs);
    g_free(z->zbuff);
    z->zbuff = NULL;
+    g_free(z->buf);
+    z->buf = NULL;
    g_free(p->data);
    p->data = NULL;
 }
@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
            flush = Z_SYNC_FLUSH;
        }

+        /*
+         * Since the VM might be running, the page may be changing concurrently
+         * with compression. zlib does not guarantee that this is safe,
+         * therefore copy the page before calling deflate().
+         */
+        memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
        zs->avail_in = page_size;
-        zs->next_in = p->pages->block->host + p->normal[i];
+        zs->next_in = z->buf;

        zs->avail_out = available;
        zs->next_out = z->zbuff + out_size;