Blame SOURCES/kvm-migration-detect-compression-and-decompression-error.patch

357786
From 231178d9b06a3d2bba1e7695071957671d7c08a1 Mon Sep 17 00:00:00 2001
357786
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
357786
Date: Fri, 22 Jun 2018 18:59:51 +0200
357786
Subject: [PATCH 12/57] migration: detect compression and decompression errors
357786
357786
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
357786
Message-id: <20180622190005.21297-5-dgilbert@redhat.com>
357786
Patchwork-id: 80996
357786
O-Subject: [RHEL7.6 qemu-kvm-rhev PATCH 04/18] migration: detect compression and decompression errors
357786
Bugzilla: 1584139
357786
RH-Acked-by: Peter Xu <peterx@redhat.com>
357786
RH-Acked-by: Juan Quintela <quintela@redhat.com>
357786
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
357786
357786
From: Xiao Guangrong <xiaoguangrong@tencent.com>
357786
357786
Currently the page being compressed is allowed to be updated by
357786
the VM on the source QEMU, correspondingly the destination QEMU
357786
just ignores the decompression error. However, we completely miss
357786
the chance to catch real errors, then the VM is corrupted silently
357786
357786
To make the migration more robuster, we copy the page to a buffer
357786
first to avoid it being written by VM, then detect and handle the
357786
errors of both compression and decompression errors properly
357786
357786
Reviewed-by: Peter Xu <peterx@redhat.com>
357786
Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
357786
Message-Id: <20180330075128.26919-5-xiaoguangrong@tencent.com>
357786
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
357786
(cherry picked from commit 34ab9e9743aeaf265929d930747f101fa5c76fea)
357786
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
357786
---
357786
 migration/qemu-file.c |  4 ++--
357786
 migration/ram.c       | 56 +++++++++++++++++++++++++++++++++++----------------
357786
 2 files changed, 41 insertions(+), 19 deletions(-)
357786
357786
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
357786
index bafe3a0..0463f4c 100644
357786
--- a/migration/qemu-file.c
357786
+++ b/migration/qemu-file.c
357786
@@ -710,9 +710,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
357786
     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
357786
                               blen, p, size);
357786
     if (blen < 0) {
357786
-        error_report("Compress Failed!");
357786
-        return 0;
357786
+        return -1;
357786
     }
357786
+
357786
     qemu_put_be32(f, blen);
357786
     if (f->ops->writev_buffer) {
357786
         add_to_iovec(f, f->buf + f->buf_index, blen, false);
357786
diff --git a/migration/ram.c b/migration/ram.c
357786
index be89cd8..cd6d98a 100644
357786
--- a/migration/ram.c
357786
+++ b/migration/ram.c
357786
@@ -269,7 +269,10 @@ struct CompressParam {
357786
     QemuCond cond;
357786
     RAMBlock *block;
357786
     ram_addr_t offset;
357786
+
357786
+    /* internally used fields */
357786
     z_stream stream;
357786
+    uint8_t *originbuf;
357786
 };
357786
 typedef struct CompressParam CompressParam;
357786
 
357786
@@ -296,13 +299,14 @@ static QemuCond comp_done_cond;
357786
 /* The empty QEMUFileOps will be used by file in CompressParam */
357786
 static const QEMUFileOps empty_ops = { };
357786
 
357786
+static QEMUFile *decomp_file;
357786
 static DecompressParam *decomp_param;
357786
 static QemuThread *decompress_threads;
357786
 static QemuMutex decomp_done_lock;
357786
 static QemuCond decomp_done_cond;
357786
 
357786
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
357786
-                                ram_addr_t offset);
357786
+                                ram_addr_t offset, uint8_t *source_buf);
357786
 
357786
 static void *do_data_compress(void *opaque)
357786
 {
357786
@@ -318,7 +322,8 @@ static void *do_data_compress(void *opaque)
357786
             param->block = NULL;
357786
             qemu_mutex_unlock(&param->mutex);
357786
 
357786
-            do_compress_ram_page(param->file, &param->stream, block, offset);
357786
+            do_compress_ram_page(param->file, &param->stream, block, offset,
357786
+                                 param->originbuf);
357786
 
357786
             qemu_mutex_lock(&comp_done_lock);
357786
             param->done = true;
357786
@@ -370,6 +375,7 @@ static void compress_threads_save_cleanup(void)
357786
         qemu_mutex_destroy(&comp_param[i].mutex);
357786
         qemu_cond_destroy(&comp_param[i].cond);
357786
         deflateEnd(&comp_param[i].stream);
357786
+        g_free(comp_param[i].originbuf);
357786
         qemu_fclose(comp_param[i].file);
357786
         comp_param[i].file = NULL;
357786
     }
357786
@@ -394,8 +400,14 @@ static int compress_threads_save_setup(void)
357786
     qemu_cond_init(&comp_done_cond);
357786
     qemu_mutex_init(&comp_done_lock);
357786
     for (i = 0; i < thread_count; i++) {
357786
+        comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
357786
+        if (!comp_param[i].originbuf) {
357786
+            goto exit;
357786
+        }
357786
+
357786
         if (deflateInit(&comp_param[i].stream,
357786
                         migrate_compress_level()) != Z_OK) {
357786
+            g_free(comp_param[i].originbuf);
357786
             goto exit;
357786
         }
357786
 
357786
@@ -1054,7 +1066,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
357786
 }
357786
 
357786
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
357786
-                                ram_addr_t offset)
357786
+                                ram_addr_t offset, uint8_t *source_buf)
357786
 {
357786
     RAMState *rs = ram_state;
357786
     int bytes_sent, blen;
357786
@@ -1062,7 +1074,14 @@ static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
357786
 
357786
     bytes_sent = save_page_header(rs, f, block, offset |
357786
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
357786
-    blen = qemu_put_compression_data(f, stream, p, TARGET_PAGE_SIZE);
357786
+
357786
+    /*
357786
+     * copy it to a internal buffer to avoid it being modified by VM
357786
+     * so that we can catch up the error during compression and
357786
+     * decompression
357786
+     */
357786
+    memcpy(source_buf, p, TARGET_PAGE_SIZE);
357786
+    blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
357786
     if (blen < 0) {
357786
         bytes_sent = 0;
357786
         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
357786
@@ -2556,7 +2575,7 @@ static void *do_data_decompress(void *opaque)
357786
     DecompressParam *param = opaque;
357786
     unsigned long pagesize;
357786
     uint8_t *des;
357786
-    int len;
357786
+    int len, ret;
357786
 
357786
     qemu_mutex_lock(&param->mutex);
357786
     while (!param->quit) {
357786
@@ -2567,13 +2586,13 @@ static void *do_data_decompress(void *opaque)
357786
             qemu_mutex_unlock(&param->mutex);
357786
 
357786
             pagesize = TARGET_PAGE_SIZE;
357786
-            /* qemu_uncompress_data() will return failed in some case,
357786
-             * especially when the page is dirtied when doing the compression,
357786
-             * it's not a problem because the dirty page will be retransferred
357786
-             * and uncompress() won't break the data in other pages.
357786
-             */
357786
-            qemu_uncompress_data(&param->stream, des, pagesize, param->compbuf,
357786
-                                 len);
357786
+
357786
+            ret = qemu_uncompress_data(&param->stream, des, pagesize,
357786
+                                       param->compbuf, len);
357786
+            if (ret < 0) {
357786
+                error_report("decompress data failed");
357786
+                qemu_file_set_error(decomp_file, ret);
357786
+            }
357786
 
357786
             qemu_mutex_lock(&decomp_done_lock);
357786
             param->done = true;
357786
@@ -2590,12 +2609,12 @@ static void *do_data_decompress(void *opaque)
357786
     return NULL;
357786
 }
357786
 
357786
-static void wait_for_decompress_done(void)
357786
+static int wait_for_decompress_done(void)
357786
 {
357786
     int idx, thread_count;
357786
 
357786
     if (!migrate_use_compression()) {
357786
-        return;
357786
+        return 0;
357786
     }
357786
 
357786
     thread_count = migrate_decompress_threads();
357786
@@ -2606,6 +2625,7 @@ static void wait_for_decompress_done(void)
357786
         }
357786
     }
357786
     qemu_mutex_unlock(&decomp_done_lock);
357786
+    return qemu_file_get_error(decomp_file);
357786
 }
357786
 
357786
 static void compress_threads_load_cleanup(void)
357786
@@ -2646,9 +2666,10 @@ static void compress_threads_load_cleanup(void)
357786
     g_free(decomp_param);
357786
     decompress_threads = NULL;
357786
     decomp_param = NULL;
357786
+    decomp_file = NULL;
357786
 }
357786
 
357786
-static int compress_threads_load_setup(void)
357786
+static int compress_threads_load_setup(QEMUFile *f)
357786
 {
357786
     int i, thread_count;
357786
 
357786
@@ -2661,6 +2682,7 @@ static int compress_threads_load_setup(void)
357786
     decomp_param = g_new0(DecompressParam, thread_count);
357786
     qemu_mutex_init(&decomp_done_lock);
357786
     qemu_cond_init(&decomp_done_cond);
357786
+    decomp_file = f;
357786
     for (i = 0; i < thread_count; i++) {
357786
         if (inflateInit(&decomp_param[i].stream) != Z_OK) {
357786
             goto exit;
357786
@@ -2720,7 +2742,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
357786
  */
357786
 static int ram_load_setup(QEMUFile *f, void *opaque)
357786
 {
357786
-    if (compress_threads_load_setup()) {
357786
+    if (compress_threads_load_setup(f)) {
357786
         return -1;
357786
     }
357786
 
357786
@@ -3075,7 +3097,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
357786
         }
357786
     }
357786
 
357786
-    wait_for_decompress_done();
357786
+    ret |= wait_for_decompress_done();
357786
     rcu_read_unlock();
357786
     trace_ram_load_complete(ret, seq_iter);
357786
     return ret;
357786
-- 
357786
1.8.3.1
357786