Blame SOURCES/kvm-migration-detect-compression-and-decompression-error.patch

1bdc94
From 231178d9b06a3d2bba1e7695071957671d7c08a1 Mon Sep 17 00:00:00 2001
1bdc94
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
1bdc94
Date: Fri, 22 Jun 2018 18:59:51 +0200
1bdc94
Subject: [PATCH 12/57] migration: detect compression and decompression errors
1bdc94
1bdc94
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
1bdc94
Message-id: <20180622190005.21297-5-dgilbert@redhat.com>
1bdc94
Patchwork-id: 80996
1bdc94
O-Subject: [RHEL7.6 qemu-kvm-rhev PATCH 04/18] migration: detect compression and decompression errors
1bdc94
Bugzilla: 1584139
1bdc94
RH-Acked-by: Peter Xu <peterx@redhat.com>
1bdc94
RH-Acked-by: Juan Quintela <quintela@redhat.com>
1bdc94
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
1bdc94
1bdc94
From: Xiao Guangrong <xiaoguangrong@tencent.com>
1bdc94
1bdc94
Currently the page being compressed is allowed to be updated by
1bdc94
the VM on the source QEMU, correspondingly the destination QEMU
1bdc94
just ignores the decompression error. However, we completely miss
1bdc94
the chance to catch real errors, then the VM is corrupted silently
1bdc94
1bdc94
To make the migration more robuster, we copy the page to a buffer
1bdc94
first to avoid it being written by VM, then detect and handle the
1bdc94
errors of both compression and decompression errors properly
1bdc94
1bdc94
Reviewed-by: Peter Xu <peterx@redhat.com>
1bdc94
Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
1bdc94
Message-Id: <20180330075128.26919-5-xiaoguangrong@tencent.com>
1bdc94
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1bdc94
(cherry picked from commit 34ab9e9743aeaf265929d930747f101fa5c76fea)
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 migration/qemu-file.c |  4 ++--
1bdc94
 migration/ram.c       | 56 +++++++++++++++++++++++++++++++++++----------------
1bdc94
 2 files changed, 41 insertions(+), 19 deletions(-)
1bdc94
1bdc94
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
1bdc94
index bafe3a0..0463f4c 100644
1bdc94
--- a/migration/qemu-file.c
1bdc94
+++ b/migration/qemu-file.c
1bdc94
@@ -710,9 +710,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
1bdc94
     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
1bdc94
                               blen, p, size);
1bdc94
     if (blen < 0) {
1bdc94
-        error_report("Compress Failed!");
1bdc94
-        return 0;
1bdc94
+        return -1;
1bdc94
     }
1bdc94
+
1bdc94
     qemu_put_be32(f, blen);
1bdc94
     if (f->ops->writev_buffer) {
1bdc94
         add_to_iovec(f, f->buf + f->buf_index, blen, false);
1bdc94
diff --git a/migration/ram.c b/migration/ram.c
1bdc94
index be89cd8..cd6d98a 100644
1bdc94
--- a/migration/ram.c
1bdc94
+++ b/migration/ram.c
1bdc94
@@ -269,7 +269,10 @@ struct CompressParam {
1bdc94
     QemuCond cond;
1bdc94
     RAMBlock *block;
1bdc94
     ram_addr_t offset;
1bdc94
+
1bdc94
+    /* internally used fields */
1bdc94
     z_stream stream;
1bdc94
+    uint8_t *originbuf;
1bdc94
 };
1bdc94
 typedef struct CompressParam CompressParam;
1bdc94
 
1bdc94
@@ -296,13 +299,14 @@ static QemuCond comp_done_cond;
1bdc94
 /* The empty QEMUFileOps will be used by file in CompressParam */
1bdc94
 static const QEMUFileOps empty_ops = { };
1bdc94
 
1bdc94
+static QEMUFile *decomp_file;
1bdc94
 static DecompressParam *decomp_param;
1bdc94
 static QemuThread *decompress_threads;
1bdc94
 static QemuMutex decomp_done_lock;
1bdc94
 static QemuCond decomp_done_cond;
1bdc94
 
1bdc94
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1bdc94
-                                ram_addr_t offset);
1bdc94
+                                ram_addr_t offset, uint8_t *source_buf);
1bdc94
 
1bdc94
 static void *do_data_compress(void *opaque)
1bdc94
 {
1bdc94
@@ -318,7 +322,8 @@ static void *do_data_compress(void *opaque)
1bdc94
             param->block = NULL;
1bdc94
             qemu_mutex_unlock(&param->mutex);
1bdc94
 
1bdc94
-            do_compress_ram_page(param->file, &param->stream, block, offset);
1bdc94
+            do_compress_ram_page(param->file, &param->stream, block, offset,
1bdc94
+                                 param->originbuf);
1bdc94
 
1bdc94
             qemu_mutex_lock(&comp_done_lock);
1bdc94
             param->done = true;
1bdc94
@@ -370,6 +375,7 @@ static void compress_threads_save_cleanup(void)
1bdc94
         qemu_mutex_destroy(&comp_param[i].mutex);
1bdc94
         qemu_cond_destroy(&comp_param[i].cond);
1bdc94
         deflateEnd(&comp_param[i].stream);
1bdc94
+        g_free(comp_param[i].originbuf);
1bdc94
         qemu_fclose(comp_param[i].file);
1bdc94
         comp_param[i].file = NULL;
1bdc94
     }
1bdc94
@@ -394,8 +400,14 @@ static int compress_threads_save_setup(void)
1bdc94
     qemu_cond_init(&comp_done_cond);
1bdc94
     qemu_mutex_init(&comp_done_lock);
1bdc94
     for (i = 0; i < thread_count; i++) {
1bdc94
+        comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
1bdc94
+        if (!comp_param[i].originbuf) {
1bdc94
+            goto exit;
1bdc94
+        }
1bdc94
+
1bdc94
         if (deflateInit(&comp_param[i].stream,
1bdc94
                         migrate_compress_level()) != Z_OK) {
1bdc94
+            g_free(comp_param[i].originbuf);
1bdc94
             goto exit;
1bdc94
         }
1bdc94
 
1bdc94
@@ -1054,7 +1066,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1bdc94
 }
1bdc94
 
1bdc94
 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1bdc94
-                                ram_addr_t offset)
1bdc94
+                                ram_addr_t offset, uint8_t *source_buf)
1bdc94
 {
1bdc94
     RAMState *rs = ram_state;
1bdc94
     int bytes_sent, blen;
1bdc94
@@ -1062,7 +1074,14 @@ static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1bdc94
 
1bdc94
     bytes_sent = save_page_header(rs, f, block, offset |
1bdc94
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
1bdc94
-    blen = qemu_put_compression_data(f, stream, p, TARGET_PAGE_SIZE);
1bdc94
+
1bdc94
+    /*
1bdc94
+     * copy it to a internal buffer to avoid it being modified by VM
1bdc94
+     * so that we can catch up the error during compression and
1bdc94
+     * decompression
1bdc94
+     */
1bdc94
+    memcpy(source_buf, p, TARGET_PAGE_SIZE);
1bdc94
+    blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1bdc94
     if (blen < 0) {
1bdc94
         bytes_sent = 0;
1bdc94
         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1bdc94
@@ -2556,7 +2575,7 @@ static void *do_data_decompress(void *opaque)
1bdc94
     DecompressParam *param = opaque;
1bdc94
     unsigned long pagesize;
1bdc94
     uint8_t *des;
1bdc94
-    int len;
1bdc94
+    int len, ret;
1bdc94
 
1bdc94
     qemu_mutex_lock(&param->mutex);
1bdc94
     while (!param->quit) {
1bdc94
@@ -2567,13 +2586,13 @@ static void *do_data_decompress(void *opaque)
1bdc94
             qemu_mutex_unlock(&param->mutex);
1bdc94
 
1bdc94
             pagesize = TARGET_PAGE_SIZE;
1bdc94
-            /* qemu_uncompress_data() will return failed in some case,
1bdc94
-             * especially when the page is dirtied when doing the compression,
1bdc94
-             * it's not a problem because the dirty page will be retransferred
1bdc94
-             * and uncompress() won't break the data in other pages.
1bdc94
-             */
1bdc94
-            qemu_uncompress_data(&param->stream, des, pagesize, param->compbuf,
1bdc94
-                                 len);
1bdc94
+
1bdc94
+            ret = qemu_uncompress_data(&param->stream, des, pagesize,
1bdc94
+                                       param->compbuf, len);
1bdc94
+            if (ret < 0) {
1bdc94
+                error_report("decompress data failed");
1bdc94
+                qemu_file_set_error(decomp_file, ret);
1bdc94
+            }
1bdc94
 
1bdc94
             qemu_mutex_lock(&decomp_done_lock);
1bdc94
             param->done = true;
1bdc94
@@ -2590,12 +2609,12 @@ static void *do_data_decompress(void *opaque)
1bdc94
     return NULL;
1bdc94
 }
1bdc94
 
1bdc94
-static void wait_for_decompress_done(void)
1bdc94
+static int wait_for_decompress_done(void)
1bdc94
 {
1bdc94
     int idx, thread_count;
1bdc94
 
1bdc94
     if (!migrate_use_compression()) {
1bdc94
-        return;
1bdc94
+        return 0;
1bdc94
     }
1bdc94
 
1bdc94
     thread_count = migrate_decompress_threads();
1bdc94
@@ -2606,6 +2625,7 @@ static void wait_for_decompress_done(void)
1bdc94
         }
1bdc94
     }
1bdc94
     qemu_mutex_unlock(&decomp_done_lock);
1bdc94
+    return qemu_file_get_error(decomp_file);
1bdc94
 }
1bdc94
 
1bdc94
 static void compress_threads_load_cleanup(void)
1bdc94
@@ -2646,9 +2666,10 @@ static void compress_threads_load_cleanup(void)
1bdc94
     g_free(decomp_param);
1bdc94
     decompress_threads = NULL;
1bdc94
     decomp_param = NULL;
1bdc94
+    decomp_file = NULL;
1bdc94
 }
1bdc94
 
1bdc94
-static int compress_threads_load_setup(void)
1bdc94
+static int compress_threads_load_setup(QEMUFile *f)
1bdc94
 {
1bdc94
     int i, thread_count;
1bdc94
 
1bdc94
@@ -2661,6 +2682,7 @@ static int compress_threads_load_setup(void)
1bdc94
     decomp_param = g_new0(DecompressParam, thread_count);
1bdc94
     qemu_mutex_init(&decomp_done_lock);
1bdc94
     qemu_cond_init(&decomp_done_cond);
1bdc94
+    decomp_file = f;
1bdc94
     for (i = 0; i < thread_count; i++) {
1bdc94
         if (inflateInit(&decomp_param[i].stream) != Z_OK) {
1bdc94
             goto exit;
1bdc94
@@ -2720,7 +2742,7 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
1bdc94
  */
1bdc94
 static int ram_load_setup(QEMUFile *f, void *opaque)
1bdc94
 {
1bdc94
-    if (compress_threads_load_setup()) {
1bdc94
+    if (compress_threads_load_setup(f)) {
1bdc94
         return -1;
1bdc94
     }
1bdc94
 
1bdc94
@@ -3075,7 +3097,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
1bdc94
         }
1bdc94
     }
1bdc94
 
1bdc94
-    wait_for_decompress_done();
1bdc94
+    ret |= wait_for_decompress_done();
1bdc94
     rcu_read_unlock();
1bdc94
     trace_ram_load_complete(ret, seq_iter);
1bdc94
     return ret;
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94