Blame SOURCES/kvm-migration-detect-compression-and-decompression-error.patch

ae23c9
From aa3254bca93fb1702f0aa236b70d705ee8bf121c Mon Sep 17 00:00:00 2001
ae23c9
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
ae23c9
Date: Wed, 1 Aug 2018 13:55:08 +0100
ae23c9
Subject: [PATCH 04/21] migration: detect compression and decompression errors
ae23c9
ae23c9
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
ae23c9
Message-id: <20180801135522.11658-5-dgilbert@redhat.com>
ae23c9
Patchwork-id: 81583
ae23c9
O-Subject: [qemu-kvm RHEL8/virt212 PATCH 04/18] migration: detect compression and decompression errors
ae23c9
Bugzilla: 1594384
ae23c9
RH-Acked-by: Peter Xu <peterx@redhat.com>
ae23c9
RH-Acked-by: John Snow <jsnow@redhat.com>
ae23c9
RH-Acked-by: Juan Quintela <quintela@redhat.com>
ae23c9
ae23c9
From: Xiao Guangrong <xiaoguangrong@tencent.com>
ae23c9
ae23c9
Currently the page being compressed is allowed to be updated by
ae23c9
the VM on the source QEMU, correspondingly the destination QEMU
ae23c9
just ignores the decompression error. However, we completely miss
ae23c9
the chance to catch real errors, then the VM is corrupted silently
ae23c9
ae23c9
To make the migration more robuster, we copy the page to a buffer
ae23c9
first to avoid it being written by VM, then detect and handle the
ae23c9
errors of both compression and decompression errors properly
ae23c9
ae23c9
Reviewed-by: Peter Xu <peterx@redhat.com>
ae23c9
Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
ae23c9
Message-Id: <20180330075128.26919-5-xiaoguangrong@tencent.com>
ae23c9
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ae23c9
(cherry picked from commit 34ab9e9743aeaf265929d930747f101fa5c76fea)
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 migration/qemu-file.c |  4 ++--
ae23c9
 migration/ram.c       | 56 +++++++++++++++++++++++++++++++++++----------------
ae23c9
 2 files changed, 41 insertions(+), 19 deletions(-)
ae23c9
ae23c9
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
ae23c9
index bafe3a0..0463f4c 100644
ae23c9
--- a/migration/qemu-file.c
ae23c9
+++ b/migration/qemu-file.c
ae23c9
@@ -710,9 +710,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
ae23c9
     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
ae23c9
                               blen, p, size);
ae23c9
     if (blen < 0) {
ae23c9
-        error_report("Compress Failed!");
ae23c9
-        return 0;
ae23c9
+        return -1;
ae23c9
     }
ae23c9
+
ae23c9
     qemu_put_be32(f, blen);
ae23c9
     if (f->ops->writev_buffer) {
ae23c9
         add_to_iovec(f, f->buf + f->buf_index, blen, false);
ae23c9
diff --git a/migration/ram.c b/migration/ram.c
ae23c9
index be89cd8..cd6d98a 100644
ae23c9
--- a/migration/ram.c
ae23c9
+++ b/migration/ram.c
ae23c9
@@ -269,7 +269,10 @@ struct CompressParam {
ae23c9
     QemuCond cond;
ae23c9
     RAMBlock *block;
ae23c9
     ram_addr_t offset;
ae23c9
+
ae23c9
+    /* internally used fields */
ae23c9
     z_stream stream;
ae23c9
+    uint8_t *originbuf;
ae23c9
 };
ae23c9
 typedef struct CompressParam CompressParam;
ae23c9
 
ae23c9
@@ -296,13 +299,14 @@ static QemuCond comp_done_cond;
ae23c9
 /* The empty QEMUFileOps will be used by file in CompressParam */
ae23c9
 static const QEMUFileOps empty_ops = { };
ae23c9
 
ae23c9
+static QEMUFile *decomp_file;
ae23c9
 static DecompressParam *decomp_param;
ae23c9
 static QemuThread *decompress_threads;
ae23c9
 static QemuMutex decomp_done_lock;
ae23c9
 static QemuCond decomp_done_cond;
ae23c9
 
ae23c9
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
ae23c9
-                                ram_addr_t offset);
ae23c9
+                                ram_addr_t offset, uint8_t *source_buf);
ae23c9
 
ae23c9
 static void *do_data_compress(void *opaque)
ae23c9
 {
ae23c9
@@ -318,7 +322,8 @@ static void *do_data_compress(void *opaque)
ae23c9
             param->block = NULL;
ae23c9
             qemu_mutex_unlock(&param->mutex);
ae23c9
 
ae23c9
-            do_compress_ram_page(param->file, &param->stream, block, offset);
ae23c9
+            do_compress_ram_page(param->file, &param->stream, block, offset,
ae23c9
+                                 param->originbuf);
ae23c9
 
ae23c9
             qemu_mutex_lock(&comp_done_lock);
ae23c9
             param->done = true;
ae23c9
@@ -370,6 +375,7 @@ static void compress_threads_save_cleanup(void)
ae23c9
         qemu_mutex_destroy(&comp_param[i].mutex);
ae23c9
         qemu_cond_destroy(&comp_param[i].cond);
ae23c9
         deflateEnd(&comp_param[i].stream);
ae23c9
+        g_free(comp_param[i].originbuf);
ae23c9
         qemu_fclose(comp_param[i].file);
ae23c9
         comp_param[i].file = NULL;
ae23c9
     }
ae23c9
@@ -394,8 +400,14 @@ static int compress_threads_save_setup(void)
ae23c9
     qemu_cond_init(&comp_done_cond);
ae23c9
     qemu_mutex_init(&comp_done_lock);
ae23c9
     for (i = 0; i < thread_count; i++) {
ae23c9
+        comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
ae23c9
+        if (!comp_param[i].originbuf) {
ae23c9
+            goto exit;
ae23c9
+        }
ae23c9
+
ae23c9
         if (deflateInit(&comp_param[i].stream,
ae23c9
                         migrate_compress_level()) != Z_OK) {
ae23c9
+            g_free(comp_param[i].originbuf);
ae23c9
             goto exit;
ae23c9
         }
ae23c9
 
ae23c9
@@ -1054,7 +1066,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
ae23c9
 }
ae23c9
 
ae23c9
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
ae23c9
-                                ram_addr_t offset)
ae23c9
+                                ram_addr_t offset, uint8_t *source_buf)
ae23c9
 {
ae23c9
     RAMState *rs = ram_state;
ae23c9
     int bytes_sent, blen;
ae23c9
@@ -1062,7 +1074,14 @@ static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
ae23c9
 
ae23c9
     bytes_sent = save_page_header(rs, f, block, offset |
ae23c9
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
ae23c9
-    blen = qemu_put_compression_data(f, stream, p, TARGET_PAGE_SIZE);
ae23c9
+
ae23c9
+    /*
ae23c9
+     * copy it to a internal buffer to avoid it being modified by VM
ae23c9
+     * so that we can catch up the error during compression and
ae23c9
+     * decompression
ae23c9
+     */
ae23c9
+    memcpy(source_buf, p, TARGET_PAGE_SIZE);
ae23c9
+    blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
ae23c9
     if (blen < 0) {
ae23c9
         bytes_sent = 0;
ae23c9
         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
ae23c9
@@ -2556,7 +2575,7 @@ static void *do_data_decompress(void *opaque)
ae23c9
     DecompressParam *param = opaque;
ae23c9
     unsigned long pagesize;
ae23c9
     uint8_t *des;
ae23c9
-    int len;
ae23c9
+    int len, ret;
ae23c9
 
ae23c9
     qemu_mutex_lock(&param->mutex);
ae23c9
     while (!param->quit) {
ae23c9
@@ -2567,13 +2586,13 @@ static void *do_data_decompress(void *opaque)
ae23c9
             qemu_mutex_unlock(&param->mutex);
ae23c9
 
ae23c9
             pagesize = TARGET_PAGE_SIZE;
ae23c9
-            /* qemu_uncompress_data() will return failed in some case,
ae23c9
-             * especially when the page is dirtied when doing the compression,
ae23c9
-             * it's not a problem because the dirty page will be retransferred
ae23c9
-             * and uncompress() won't break the data in other pages.
ae23c9
-             */
ae23c9
-            qemu_uncompress_data(&param->stream, des, pagesize, param->compbuf,
ae23c9
-                                 len);
ae23c9
+
ae23c9
+            ret = qemu_uncompress_data(&param->stream, des, pagesize,
ae23c9
+                                       param->compbuf, len);
ae23c9
+            if (ret < 0) {
ae23c9
+                error_report("decompress data failed");
ae23c9
+                qemu_file_set_error(decomp_file, ret);
ae23c9
+            }
ae23c9
 
ae23c9
             qemu_mutex_lock(&decomp_done_lock);
ae23c9
             param->done = true;
ae23c9
@@ -2590,12 +2609,12 @@ static void *do_data_decompress(void *opaque)
ae23c9
     return NULL;
ae23c9
 }
ae23c9
 
ae23c9
-static void wait_for_decompress_done(void)
ae23c9
+static int wait_for_decompress_done(void)
ae23c9
 {
ae23c9
     int idx, thread_count;
ae23c9
 
ae23c9
     if (!migrate_use_compression()) {
ae23c9
-        return;
ae23c9
+        return 0;
ae23c9
     }
ae23c9
 
ae23c9
     thread_count = migrate_decompress_threads();
ae23c9
@@ -2606,6 +2625,7 @@ static void wait_for_decompress_done(void)
ae23c9
         }
ae23c9
     }
ae23c9
     qemu_mutex_unlock(&decomp_done_lock);
ae23c9
+    return qemu_file_get_error(decomp_file);
ae23c9
 }
ae23c9
 
ae23c9
 static void compress_threads_load_cleanup(void)
ae23c9
@@ -2646,9 +2666,10 @@ static void compress_threads_load_cleanup(void)
ae23c9
     g_free(decomp_param);
ae23c9
     decompress_threads = NULL;
ae23c9
     decomp_param = NULL;
ae23c9
+    decomp_file = NULL;
ae23c9
 }
ae23c9
 
ae23c9
-static int compress_threads_load_setup(void)
ae23c9
+static int compress_threads_load_setup(QEMUFile *f)
ae23c9
 {
ae23c9
     int i, thread_count;
ae23c9
 
ae23c9
@@ -2661,6 +2682,7 @@ static int compress_threads_load_setup(void)
ae23c9
     decomp_param = g_new0(DecompressParam, thread_count);
ae23c9
     qemu_mutex_init(&decomp_done_lock);
ae23c9
     qemu_cond_init(&decomp_done_cond);
ae23c9
+    decomp_file = f;
ae23c9
     for (i = 0; i < thread_count; i++) {
ae23c9
         if (inflateInit(&decomp_param[i].stream) != Z_OK) {
ae23c9
             goto exit;
ae23c9
@@ -2720,7 +2742,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
ae23c9
  */
ae23c9
 static int ram_load_setup(QEMUFile *f, void *opaque)
ae23c9
 {
ae23c9
-    if (compress_threads_load_setup()) {
ae23c9
+    if (compress_threads_load_setup(f)) {
ae23c9
         return -1;
ae23c9
     }
ae23c9
 
ae23c9
@@ -3075,7 +3097,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ae23c9
         }
ae23c9
     }
ae23c9
 
ae23c9
-    wait_for_decompress_done();
ae23c9
+    ret |= wait_for_decompress_done();
ae23c9
     rcu_read_unlock();
ae23c9
     trace_ram_load_complete(ret, seq_iter);
ae23c9
     return ret;
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9