Blame SOURCES/kvm-migration-discard-non-migratable-RAMBlocks.patch

383d26
From 0319afed69c1b3206fce51fd286c3351c6fd6958 Mon Sep 17 00:00:00 2001
383d26
From: "plai@redhat.com" <plai@redhat.com>
383d26
Date: Fri, 31 Aug 2018 16:25:51 +0200
383d26
Subject: [PATCH 09/29] migration: discard non-migratable RAMBlocks
383d26
MIME-Version: 1.0
383d26
Content-Type: text/plain; charset=UTF-8
383d26
Content-Transfer-Encoding: 8bit
383d26
383d26
RH-Author: plai@redhat.com
383d26
Message-id: <1535732759-22481-2-git-send-email-plai@redhat.com>
383d26
Patchwork-id: 82012
383d26
O-Subject: [RHEL7.6 PATCH BZ 1539280 1/9] migration: discard non-migratable RAMBlocks
383d26
Bugzilla: 1539280
383d26
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
383d26
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
383d26
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
383d26
383d26
From: Cédric Le Goater <clg@kaod.org>
383d26
383d26
On the POWER9 processor, the XIVE interrupt controller can control
383d26
interrupt sources using MMIO to trigger events, to EOI or to turn off
383d26
the sources. Priority management and interrupt acknowledgment is also
383d26
controlled by MMIO in the presenter sub-engine.
383d26
383d26
These MMIO regions are exposed to guests in QEMU with a set of 'ram
383d26
device' memory mappings, similarly to VFIO, and the VMAs are populated
383d26
dynamically with the appropriate pages using a fault handler.
383d26
383d26
But, these regions are an issue for migration. We need to discard the
383d26
associated RAMBlocks from the RAM state on the source VM and let the
383d26
destination VM rebuild the memory mappings on the new host in the
383d26
post_load() operation just before resuming the system.
383d26
383d26
To achieve this goal, the following introduces a new RAMBlock flag
383d26
RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
383d26
vmstate_unregister_ram() routines. This flag is then used by the
383d26
migration to identify RAMBlocks to discard on the source. Some checks
383d26
are also performed on the destination to make sure nothing invalid was
383d26
sent.
383d26
383d26
This change impacts the boston, malta and jazz mips boards for which
383d26
migration compatibility is broken.
383d26
383d26
Signed-off-by: Cédric Le Goater <clg@kaod.org>
383d26
Reviewed-by: Juan Quintela <quintela@redhat.com>
383d26
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
383d26
Signed-off-by: Juan Quintela <quintela@redhat.com>
383d26
(cherry picked from commit b895de502717b83b4e5f089df617cb23530c4d2d)
383d26
Signed-off-by: Paul Lai <plai@redhat.com>
383d26
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
383d26
---
383d26
 exec.c                    | 38 ++++++++++++++++++++++++++++++++++++++
383d26
 include/exec/cpu-common.h |  4 ++++
383d26
 migration/postcopy-ram.c  | 12 ++++++------
383d26
 migration/ram.c           | 46 ++++++++++++++++++++++++++++++++++------------
383d26
 migration/savevm.c        |  2 ++
383d26
 5 files changed, 84 insertions(+), 18 deletions(-)
383d26
383d26
diff --git a/exec.c b/exec.c
383d26
index 02b1efe..7323d39 100644
383d26
--- a/exec.c
383d26
+++ b/exec.c
383d26
@@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
383d26
  * (Set during postcopy)
383d26
  */
383d26
 #define RAM_UF_ZEROPAGE (1 << 3)
383d26
+
383d26
+/* RAM can be migrated */
383d26
+#define RAM_MIGRATABLE (1 << 4)
383d26
 #endif
383d26
 
383d26
 #ifdef TARGET_PAGE_BITS_VARY
383d26
@@ -1807,6 +1810,21 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
383d26
     rb->flags |= RAM_UF_ZEROPAGE;
383d26
 }
383d26
 
383d26
+bool qemu_ram_is_migratable(RAMBlock *rb)
383d26
+{
383d26
+    return rb->flags & RAM_MIGRATABLE;
383d26
+}
383d26
+
383d26
+void qemu_ram_set_migratable(RAMBlock *rb)
383d26
+{
383d26
+    rb->flags |= RAM_MIGRATABLE;
383d26
+}
383d26
+
383d26
+void qemu_ram_unset_migratable(RAMBlock *rb)
383d26
+{
383d26
+    rb->flags &= ~RAM_MIGRATABLE;
383d26
+}
383d26
+
383d26
 /* Called with iothread lock held.  */
383d26
 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
383d26
 {
383d26
@@ -3750,6 +3768,26 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
383d26
     return ret;
383d26
 }
383d26
 
383d26
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque)
383d26
+{
383d26
+    RAMBlock *block;
383d26
+    int ret = 0;
383d26
+
383d26
+    rcu_read_lock();
383d26
+    RAMBLOCK_FOREACH(block) {
383d26
+        if (!qemu_ram_is_migratable(block)) {
383d26
+            continue;
383d26
+        }
383d26
+        ret = func(block->idstr, block->host, block->offset,
383d26
+                   block->used_length, opaque);
383d26
+        if (ret) {
383d26
+            break;
383d26
+        }
383d26
+    }
383d26
+    rcu_read_unlock();
383d26
+    return ret;
383d26
+}
383d26
+
383d26
 /*
383d26
  * Unmap pages of memory from start to start+length such that
383d26
  * they a) read as 0, b) Trigger whatever fault mechanism
383d26
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
383d26
index 24d335f..0b58e26 100644
383d26
--- a/include/exec/cpu-common.h
383d26
+++ b/include/exec/cpu-common.h
383d26
@@ -75,6 +75,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
383d26
 bool qemu_ram_is_shared(RAMBlock *rb);
383d26
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
383d26
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
383d26
+bool qemu_ram_is_migratable(RAMBlock *rb);
383d26
+void qemu_ram_set_migratable(RAMBlock *rb);
383d26
+void qemu_ram_unset_migratable(RAMBlock *rb);
383d26
 
383d26
 size_t qemu_ram_pagesize(RAMBlock *block);
383d26
 size_t qemu_ram_pagesize_largest(void);
383d26
@@ -119,6 +122,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
383d26
     ram_addr_t offset, ram_addr_t length, void *opaque);
383d26
 
383d26
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
383d26
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
383d26
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
383d26
 
383d26
 #endif
383d26
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
383d26
index 4a0b33b..001b041 100644
383d26
--- a/migration/postcopy-ram.c
383d26
+++ b/migration/postcopy-ram.c
383d26
@@ -264,7 +264,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
383d26
     }
383d26
 
383d26
     /* We don't support postcopy with shared RAM yet */
383d26
-    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
383d26
+    if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
383d26
         goto out;
383d26
     }
383d26
 
383d26
@@ -392,7 +392,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
383d26
  */
383d26
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
383d26
 {
383d26
-    if (qemu_ram_foreach_block(init_range, NULL)) {
383d26
+    if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
383d26
         return -1;
383d26
     }
383d26
 
383d26
@@ -414,7 +414,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
383d26
             return -1;
383d26
         }
383d26
 
383d26
-        if (qemu_ram_foreach_block(cleanup_range, mis)) {
383d26
+        if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
383d26
             return -1;
383d26
         }
383d26
         /* Let the fault thread quit */
383d26
@@ -480,7 +480,7 @@ static int nhp_range(const char *block_name, void *host_addr,
383d26
  */
383d26
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
383d26
 {
383d26
-    if (qemu_ram_foreach_block(nhp_range, mis)) {
383d26
+    if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
383d26
         return -1;
383d26
     }
383d26
 
383d26
@@ -491,7 +491,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
383d26
 
383d26
 /*
383d26
  * Mark the given area of RAM as requiring notification to unwritten areas
383d26
- * Used as a  callback on qemu_ram_foreach_block.
383d26
+ * Used as a  callback on qemu_ram_foreach_migratable_block.
383d26
  *   host_addr: Base of area to mark
383d26
  *   offset: Offset in the whole ram arena
383d26
  *   length: Length of the section
383d26
@@ -793,7 +793,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
383d26
     mis->have_fault_thread = true;
383d26
 
383d26
     /* Mark so that we get notified of accesses to unwritten areas */
383d26
-    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
383d26
+    if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
383d26
         return -1;
383d26
     }
383d26
 
383d26
diff --git a/migration/ram.c b/migration/ram.c
383d26
index bd563b5..04b5df5 100644
383d26
--- a/migration/ram.c
383d26
+++ b/migration/ram.c
383d26
@@ -153,11 +153,16 @@ out:
383d26
     return ret;
383d26
 }
383d26
 
383d26
+/* Should be holding either ram_list.mutex, or the RCU lock. */
383d26
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
383d26
+    RAMBLOCK_FOREACH(block)                            \
383d26
+        if (!qemu_ram_is_migratable(block)) {} else
383d26
+
383d26
 static void ramblock_recv_map_init(void)
383d26
 {
383d26
     RAMBlock *rb;
383d26
 
383d26
-    RAMBLOCK_FOREACH(rb) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
383d26
         assert(!rb->receivedmap);
383d26
         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
383d26
     }
383d26
@@ -813,6 +818,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
383d26
     unsigned long *bitmap = rb->bmap;
383d26
     unsigned long next;
383d26
 
383d26
+    if (!qemu_ram_is_migratable(rb)) {
383d26
+        return size;
383d26
+    }
383d26
+
383d26
     if (rs->ram_bulk_stage && start > 0) {
383d26
         next = start + 1;
383d26
     } else {
383d26
@@ -858,7 +867,7 @@ uint64_t ram_pagesize_summary(void)
383d26
     RAMBlock *block;
383d26
     uint64_t summary = 0;
383d26
 
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         summary |= block->page_size;
383d26
     }
383d26
 
383d26
@@ -882,7 +891,7 @@ static void migration_bitmap_sync(RAMState *rs)
383d26
 
383d26
     qemu_mutex_lock(&rs->bitmap_mutex);
383d26
     rcu_read_lock();
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
383d26
     }
383d26
     ram_counters.remaining = ram_bytes_remaining();
383d26
@@ -1522,6 +1531,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
383d26
     size_t pagesize_bits =
383d26
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
383d26
 
383d26
+    if (!qemu_ram_is_migratable(pss->block)) {
383d26
+        error_report("block %s should not be migrated !", pss->block->idstr);
383d26
+        return 0;
383d26
+    }
383d26
+
383d26
     do {
383d26
         /* Check the pages is dirty and if it is send it */
383d26
         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
383d26
@@ -1620,7 +1634,7 @@ uint64_t ram_bytes_total(void)
383d26
     uint64_t total = 0;
383d26
 
383d26
     rcu_read_lock();
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         total += block->used_length;
383d26
     }
383d26
     rcu_read_unlock();
383d26
@@ -1675,7 +1689,7 @@ static void ram_save_cleanup(void *opaque)
383d26
      */
383d26
     memory_global_dirty_log_stop();
383d26
 
383d26
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         g_free(block->bmap);
383d26
         block->bmap = NULL;
383d26
         g_free(block->unsentmap);
383d26
@@ -1738,7 +1752,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
383d26
 {
383d26
     struct RAMBlock *block;
383d26
 
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         unsigned long *bitmap = block->bmap;
383d26
         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
383d26
         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
383d26
@@ -1816,7 +1830,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
383d26
     struct RAMBlock *block;
383d26
     int ret;
383d26
 
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         PostcopyDiscardState *pds =
383d26
             postcopy_discard_send_init(ms, block->idstr);
383d26
 
383d26
@@ -2024,7 +2038,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
383d26
     rs->last_sent_block = NULL;
383d26
     rs->last_page = 0;
383d26
 
383d26
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
383d26
         unsigned long *bitmap = block->bmap;
383d26
         unsigned long *unsentmap = block->unsentmap;
383d26
@@ -2183,7 +2197,7 @@ static void ram_list_init_bitmaps(void)
383d26
 
383d26
     /* Skip setting bitmap if there is no RAM */
383d26
     if (ram_bytes_total()) {
383d26
-        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
383d26
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
             pages = block->max_length >> TARGET_PAGE_BITS;
383d26
             block->bmap = bitmap_new(pages);
383d26
             bitmap_set(block->bmap, 0, pages);
383d26
@@ -2264,7 +2278,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
383d26
 
383d26
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
383d26
 
383d26
-    RAMBLOCK_FOREACH(block) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
383d26
         qemu_put_byte(f, strlen(block->idstr));
383d26
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
383d26
         qemu_put_be64(f, block->used_length);
383d26
@@ -2508,6 +2522,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
383d26
         return NULL;
383d26
     }
383d26
 
383d26
+    if (!qemu_ram_is_migratable(block)) {
383d26
+        error_report("block %s should not be migrated !", id);
383d26
+        return NULL;
383d26
+    }
383d26
+
383d26
     return block;
383d26
 }
383d26
 
383d26
@@ -2750,7 +2769,7 @@ static int ram_load_cleanup(void *opaque)
383d26
     xbzrle_load_cleanup();
383d26
     compress_threads_load_cleanup();
383d26
 
383d26
-    RAMBLOCK_FOREACH(rb) {
383d26
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
383d26
         g_free(rb->receivedmap);
383d26
         rb->receivedmap = NULL;
383d26
     }
383d26
@@ -3012,7 +3031,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
383d26
                 length = qemu_get_be64(f);
383d26
 
383d26
                 block = qemu_ram_block_by_name(id);
383d26
-                if (block) {
383d26
+                if (block && !qemu_ram_is_migratable(block)) {
383d26
+                    error_report("block %s should not be migrated !", id);
383d26
+                    ret = -EINVAL;
383d26
+                } else if (block) {
383d26
                     if (length != block->used_length) {
383d26
                         Error *local_err = NULL;
383d26
 
383d26
diff --git a/migration/savevm.c b/migration/savevm.c
383d26
index 56c9feb..b975d3a 100644
383d26
--- a/migration/savevm.c
383d26
+++ b/migration/savevm.c
383d26
@@ -2510,11 +2510,13 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
383d26
 {
383d26
     qemu_ram_set_idstr(mr->ram_block,
383d26
                        memory_region_name(mr), dev);
383d26
+    qemu_ram_set_migratable(mr->ram_block);
383d26
 }
383d26
 
383d26
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
383d26
 {
383d26
     qemu_ram_unset_idstr(mr->ram_block);
383d26
+    qemu_ram_unset_migratable(mr->ram_block);
383d26
 }
383d26
 
383d26
 void vmstate_register_ram_global(MemoryRegion *mr)
383d26
-- 
383d26
1.8.3.1
383d26