Blame SOURCES/kvm-migration-discard-non-migratable-RAMBlocks.patch

1bdc94
From 0319afed69c1b3206fce51fd286c3351c6fd6958 Mon Sep 17 00:00:00 2001
1bdc94
From: "plai@redhat.com" <plai@redhat.com>
1bdc94
Date: Fri, 31 Aug 2018 16:25:51 +0200
1bdc94
Subject: [PATCH 09/29] migration: discard non-migratable RAMBlocks
1bdc94
MIME-Version: 1.0
1bdc94
Content-Type: text/plain; charset=UTF-8
1bdc94
Content-Transfer-Encoding: 8bit
1bdc94
1bdc94
RH-Author: plai@redhat.com
1bdc94
Message-id: <1535732759-22481-2-git-send-email-plai@redhat.com>
1bdc94
Patchwork-id: 82012
1bdc94
O-Subject: [RHEL7.6 PATCH BZ 1539280 1/9] migration: discard non-migratable RAMBlocks
1bdc94
Bugzilla: 1539280
1bdc94
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
1bdc94
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
1bdc94
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
1bdc94
From: Cédric Le Goater <clg@kaod.org>
1bdc94
1bdc94
On the POWER9 processor, the XIVE interrupt controller can control
1bdc94
interrupt sources using MMIO to trigger events, to EOI or to turn off
1bdc94
the sources. Priority management and interrupt acknowledgment is also
1bdc94
controlled by MMIO in the presenter sub-engine.
1bdc94
1bdc94
These MMIO regions are exposed to guests in QEMU with a set of 'ram
1bdc94
device' memory mappings, similarly to VFIO, and the VMAs are populated
1bdc94
dynamically with the appropriate pages using a fault handler.
1bdc94
1bdc94
But, these regions are an issue for migration. We need to discard the
1bdc94
associated RAMBlocks from the RAM state on the source VM and let the
1bdc94
destination VM rebuild the memory mappings on the new host in the
1bdc94
post_load() operation just before resuming the system.
1bdc94
1bdc94
To achieve this goal, the following introduces a new RAMBlock flag
1bdc94
RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
1bdc94
vmstate_unregister_ram() routines. This flag is then used by the
1bdc94
migration to identify RAMBlocks to discard on the source. Some checks
1bdc94
are also performed on the destination to make sure nothing invalid was
1bdc94
sent.
1bdc94
1bdc94
This change impacts the boston, malta and jazz mips boards for which
1bdc94
migration compatibility is broken.
1bdc94
1bdc94
Signed-off-by: Cédric Le Goater <clg@kaod.org>
1bdc94
Reviewed-by: Juan Quintela <quintela@redhat.com>
1bdc94
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
1bdc94
Signed-off-by: Juan Quintela <quintela@redhat.com>
1bdc94
(cherry picked from commit b895de502717b83b4e5f089df617cb23530c4d2d)
1bdc94
Signed-off-by: Paul Lai <plai@redhat.com>
1bdc94
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
1bdc94
---
1bdc94
 exec.c                    | 38 ++++++++++++++++++++++++++++++++++++++
1bdc94
 include/exec/cpu-common.h |  4 ++++
1bdc94
 migration/postcopy-ram.c  | 12 ++++++------
1bdc94
 migration/ram.c           | 46 ++++++++++++++++++++++++++++++++++------------
1bdc94
 migration/savevm.c        |  2 ++
1bdc94
 5 files changed, 84 insertions(+), 18 deletions(-)
1bdc94
1bdc94
diff --git a/exec.c b/exec.c
1bdc94
index 02b1efe..7323d39 100644
1bdc94
--- a/exec.c
1bdc94
+++ b/exec.c
1bdc94
@@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
1bdc94
  * (Set during postcopy)
1bdc94
  */
1bdc94
 #define RAM_UF_ZEROPAGE (1 << 3)
1bdc94
+
1bdc94
+/* RAM can be migrated */
1bdc94
+#define RAM_MIGRATABLE (1 << 4)
1bdc94
 #endif
1bdc94
 
1bdc94
 #ifdef TARGET_PAGE_BITS_VARY
1bdc94
@@ -1807,6 +1810,21 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
1bdc94
     rb->flags |= RAM_UF_ZEROPAGE;
1bdc94
 }
1bdc94
 
1bdc94
+bool qemu_ram_is_migratable(RAMBlock *rb)
1bdc94
+{
1bdc94
+    return rb->flags & RAM_MIGRATABLE;
1bdc94
+}
1bdc94
+
1bdc94
+void qemu_ram_set_migratable(RAMBlock *rb)
1bdc94
+{
1bdc94
+    rb->flags |= RAM_MIGRATABLE;
1bdc94
+}
1bdc94
+
1bdc94
+void qemu_ram_unset_migratable(RAMBlock *rb)
1bdc94
+{
1bdc94
+    rb->flags &= ~RAM_MIGRATABLE;
1bdc94
+}
1bdc94
+
1bdc94
 /* Called with iothread lock held.  */
1bdc94
 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1bdc94
 {
1bdc94
@@ -3750,6 +3768,26 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
1bdc94
     return ret;
1bdc94
 }
1bdc94
 
1bdc94
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque)
1bdc94
+{
1bdc94
+    RAMBlock *block;
1bdc94
+    int ret = 0;
1bdc94
+
1bdc94
+    rcu_read_lock();
1bdc94
+    RAMBLOCK_FOREACH(block) {
1bdc94
+        if (!qemu_ram_is_migratable(block)) {
1bdc94
+            continue;
1bdc94
+        }
1bdc94
+        ret = func(block->idstr, block->host, block->offset,
1bdc94
+                   block->used_length, opaque);
1bdc94
+        if (ret) {
1bdc94
+            break;
1bdc94
+        }
1bdc94
+    }
1bdc94
+    rcu_read_unlock();
1bdc94
+    return ret;
1bdc94
+}
1bdc94
+
1bdc94
 /*
1bdc94
  * Unmap pages of memory from start to start+length such that
1bdc94
  * they a) read as 0, b) Trigger whatever fault mechanism
1bdc94
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
1bdc94
index 24d335f..0b58e26 100644
1bdc94
--- a/include/exec/cpu-common.h
1bdc94
+++ b/include/exec/cpu-common.h
1bdc94
@@ -75,6 +75,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
1bdc94
 bool qemu_ram_is_shared(RAMBlock *rb);
1bdc94
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
1bdc94
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
1bdc94
+bool qemu_ram_is_migratable(RAMBlock *rb);
1bdc94
+void qemu_ram_set_migratable(RAMBlock *rb);
1bdc94
+void qemu_ram_unset_migratable(RAMBlock *rb);
1bdc94
 
1bdc94
 size_t qemu_ram_pagesize(RAMBlock *block);
1bdc94
 size_t qemu_ram_pagesize_largest(void);
1bdc94
@@ -119,6 +122,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
1bdc94
     ram_addr_t offset, ram_addr_t length, void *opaque);
1bdc94
 
1bdc94
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
1bdc94
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
1bdc94
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
1bdc94
 
1bdc94
 #endif
1bdc94
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
1bdc94
index 4a0b33b..001b041 100644
1bdc94
--- a/migration/postcopy-ram.c
1bdc94
+++ b/migration/postcopy-ram.c
1bdc94
@@ -264,7 +264,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
1bdc94
     }
1bdc94
 
1bdc94
     /* We don't support postcopy with shared RAM yet */
1bdc94
-    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
1bdc94
+    if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
1bdc94
         goto out;
1bdc94
     }
1bdc94
 
1bdc94
@@ -392,7 +392,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
1bdc94
  */
1bdc94
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
1bdc94
 {
1bdc94
-    if (qemu_ram_foreach_block(init_range, NULL)) {
1bdc94
+    if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
1bdc94
         return -1;
1bdc94
     }
1bdc94
 
1bdc94
@@ -414,7 +414,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
1bdc94
             return -1;
1bdc94
         }
1bdc94
 
1bdc94
-        if (qemu_ram_foreach_block(cleanup_range, mis)) {
1bdc94
+        if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
1bdc94
             return -1;
1bdc94
         }
1bdc94
         /* Let the fault thread quit */
1bdc94
@@ -480,7 +480,7 @@ static int nhp_range(const char *block_name, void *host_addr,
1bdc94
  */
1bdc94
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
1bdc94
 {
1bdc94
-    if (qemu_ram_foreach_block(nhp_range, mis)) {
1bdc94
+    if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
1bdc94
         return -1;
1bdc94
     }
1bdc94
 
1bdc94
@@ -491,7 +491,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
1bdc94
 
1bdc94
 /*
1bdc94
  * Mark the given area of RAM as requiring notification to unwritten areas
1bdc94
- * Used as a  callback on qemu_ram_foreach_block.
1bdc94
+ * Used as a  callback on qemu_ram_foreach_migratable_block.
1bdc94
  *   host_addr: Base of area to mark
1bdc94
  *   offset: Offset in the whole ram arena
1bdc94
  *   length: Length of the section
1bdc94
@@ -793,7 +793,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
1bdc94
     mis->have_fault_thread = true;
1bdc94
 
1bdc94
     /* Mark so that we get notified of accesses to unwritten areas */
1bdc94
-    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
1bdc94
+    if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
1bdc94
         return -1;
1bdc94
     }
1bdc94
 
1bdc94
diff --git a/migration/ram.c b/migration/ram.c
1bdc94
index bd563b5..04b5df5 100644
1bdc94
--- a/migration/ram.c
1bdc94
+++ b/migration/ram.c
1bdc94
@@ -153,11 +153,16 @@ out:
1bdc94
     return ret;
1bdc94
 }
1bdc94
 
1bdc94
+/* Should be holding either ram_list.mutex, or the RCU lock. */
1bdc94
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
1bdc94
+    RAMBLOCK_FOREACH(block)                            \
1bdc94
+        if (!qemu_ram_is_migratable(block)) {} else
1bdc94
+
1bdc94
 static void ramblock_recv_map_init(void)
1bdc94
 {
1bdc94
     RAMBlock *rb;
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(rb) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
1bdc94
         assert(!rb->receivedmap);
1bdc94
         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
1bdc94
     }
1bdc94
@@ -813,6 +818,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1bdc94
     unsigned long *bitmap = rb->bmap;
1bdc94
     unsigned long next;
1bdc94
 
1bdc94
+    if (!qemu_ram_is_migratable(rb)) {
1bdc94
+        return size;
1bdc94
+    }
1bdc94
+
1bdc94
     if (rs->ram_bulk_stage && start > 0) {
1bdc94
         next = start + 1;
1bdc94
     } else {
1bdc94
@@ -858,7 +867,7 @@ uint64_t ram_pagesize_summary(void)
1bdc94
     RAMBlock *block;
1bdc94
     uint64_t summary = 0;
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         summary |= block->page_size;
1bdc94
     }
1bdc94
 
1bdc94
@@ -882,7 +891,7 @@ static void migration_bitmap_sync(RAMState *rs)
1bdc94
 
1bdc94
     qemu_mutex_lock(&rs->bitmap_mutex);
1bdc94
     rcu_read_lock();
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
1bdc94
     }
1bdc94
     ram_counters.remaining = ram_bytes_remaining();
1bdc94
@@ -1522,6 +1531,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1bdc94
     size_t pagesize_bits =
1bdc94
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1bdc94
 
1bdc94
+    if (!qemu_ram_is_migratable(pss->block)) {
1bdc94
+        error_report("block %s should not be migrated !", pss->block->idstr);
1bdc94
+        return 0;
1bdc94
+    }
1bdc94
+
1bdc94
     do {
1bdc94
         /* Check the pages is dirty and if it is send it */
1bdc94
         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1bdc94
@@ -1620,7 +1634,7 @@ uint64_t ram_bytes_total(void)
1bdc94
     uint64_t total = 0;
1bdc94
 
1bdc94
     rcu_read_lock();
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         total += block->used_length;
1bdc94
     }
1bdc94
     rcu_read_unlock();
1bdc94
@@ -1675,7 +1689,7 @@ static void ram_save_cleanup(void *opaque)
1bdc94
      */
1bdc94
     memory_global_dirty_log_stop();
1bdc94
 
1bdc94
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         g_free(block->bmap);
1bdc94
         block->bmap = NULL;
1bdc94
         g_free(block->unsentmap);
1bdc94
@@ -1738,7 +1752,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
1bdc94
 {
1bdc94
     struct RAMBlock *block;
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         unsigned long *bitmap = block->bmap;
1bdc94
         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1bdc94
         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1bdc94
@@ -1816,7 +1830,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
1bdc94
     struct RAMBlock *block;
1bdc94
     int ret;
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         PostcopyDiscardState *pds =
1bdc94
             postcopy_discard_send_init(ms, block->idstr);
1bdc94
 
1bdc94
@@ -2024,7 +2038,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1bdc94
     rs->last_sent_block = NULL;
1bdc94
     rs->last_page = 0;
1bdc94
 
1bdc94
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1bdc94
         unsigned long *bitmap = block->bmap;
1bdc94
         unsigned long *unsentmap = block->unsentmap;
1bdc94
@@ -2183,7 +2197,7 @@ static void ram_list_init_bitmaps(void)
1bdc94
 
1bdc94
     /* Skip setting bitmap if there is no RAM */
1bdc94
     if (ram_bytes_total()) {
1bdc94
-        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1bdc94
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
             pages = block->max_length >> TARGET_PAGE_BITS;
1bdc94
             block->bmap = bitmap_new(pages);
1bdc94
             bitmap_set(block->bmap, 0, pages);
1bdc94
@@ -2264,7 +2278,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
1bdc94
 
1bdc94
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(block) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
1bdc94
         qemu_put_byte(f, strlen(block->idstr));
1bdc94
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1bdc94
         qemu_put_be64(f, block->used_length);
1bdc94
@@ -2508,6 +2522,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
1bdc94
         return NULL;
1bdc94
     }
1bdc94
 
1bdc94
+    if (!qemu_ram_is_migratable(block)) {
1bdc94
+        error_report("block %s should not be migrated !", id);
1bdc94
+        return NULL;
1bdc94
+    }
1bdc94
+
1bdc94
     return block;
1bdc94
 }
1bdc94
 
1bdc94
@@ -2750,7 +2769,7 @@ static int ram_load_cleanup(void *opaque)
1bdc94
     xbzrle_load_cleanup();
1bdc94
     compress_threads_load_cleanup();
1bdc94
 
1bdc94
-    RAMBLOCK_FOREACH(rb) {
1bdc94
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
1bdc94
         g_free(rb->receivedmap);
1bdc94
         rb->receivedmap = NULL;
1bdc94
     }
1bdc94
@@ -3012,7 +3031,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
1bdc94
                 length = qemu_get_be64(f);
1bdc94
 
1bdc94
                 block = qemu_ram_block_by_name(id);
1bdc94
-                if (block) {
1bdc94
+                if (block && !qemu_ram_is_migratable(block)) {
1bdc94
+                    error_report("block %s should not be migrated !", id);
1bdc94
+                    ret = -EINVAL;
1bdc94
+                } else if (block) {
1bdc94
                     if (length != block->used_length) {
1bdc94
                         Error *local_err = NULL;
1bdc94
 
1bdc94
diff --git a/migration/savevm.c b/migration/savevm.c
1bdc94
index 56c9feb..b975d3a 100644
1bdc94
--- a/migration/savevm.c
1bdc94
+++ b/migration/savevm.c
1bdc94
@@ -2510,11 +2510,13 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
1bdc94
 {
1bdc94
     qemu_ram_set_idstr(mr->ram_block,
1bdc94
                        memory_region_name(mr), dev);
1bdc94
+    qemu_ram_set_migratable(mr->ram_block);
1bdc94
 }
1bdc94
 
1bdc94
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
1bdc94
 {
1bdc94
     qemu_ram_unset_idstr(mr->ram_block);
1bdc94
+    qemu_ram_unset_migratable(mr->ram_block);
1bdc94
 }
1bdc94
 
1bdc94
 void vmstate_register_ram_global(MemoryRegion *mr)
1bdc94
-- 
1bdc94
1.8.3.1
1bdc94