ae23c9
From c38aaf5a09a4f06096f9a66ca3a7c22c7e657a4f Mon Sep 17 00:00:00 2001
ae23c9
From: "plai@redhat.com" <plai@redhat.com>
ae23c9
Date: Mon, 7 Jan 2019 17:02:14 +0000
ae23c9
Subject: [PATCH 13/22] migration: discard non-migratable RAMBlocks
ae23c9
MIME-Version: 1.0
ae23c9
Content-Type: text/plain; charset=UTF-8
ae23c9
Content-Transfer-Encoding: 8bit
ae23c9
ae23c9
RH-Author: plai@redhat.com
ae23c9
Message-id: <1546880543-24860-2-git-send-email-plai@redhat.com>
ae23c9
Patchwork-id: 83886
ae23c9
O-Subject: [RHEL8.0 qemu-kvm PATCH v7 01/10] migration: discard non-migratable RAMBlocks
ae23c9
Bugzilla: 1539285
ae23c9
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ae23c9
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
ae23c9
ae23c9
From: Cédric Le Goater <clg@kaod.org>
ae23c9
ae23c9
On the POWER9 processor, the XIVE interrupt controller can control
ae23c9
interrupt sources using MMIO to trigger events, to EOI or to turn off
ae23c9
the sources. Priority management and interrupt acknowledgment is also
ae23c9
controlled by MMIO in the presenter sub-engine.
ae23c9
ae23c9
These MMIO regions are exposed to guests in QEMU with a set of 'ram
ae23c9
device' memory mappings, similarly to VFIO, and the VMAs are populated
ae23c9
dynamically with the appropriate pages using a fault handler.
ae23c9
ae23c9
But, these regions are an issue for migration. We need to discard the
ae23c9
associated RAMBlocks from the RAM state on the source VM and let the
ae23c9
destination VM rebuild the memory mappings on the new host in the
ae23c9
post_load() operation just before resuming the system.
ae23c9
ae23c9
To achieve this goal, the following introduces a new RAMBlock flag
ae23c9
RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
ae23c9
vmstate_unregister_ram() routines. This flag is then used by the
ae23c9
migration to identify RAMBlocks to discard on the source. Some checks
ae23c9
are also performed on the destination to make sure nothing invalid was
ae23c9
sent.
ae23c9
ae23c9
This change impacts the boston, malta and jazz mips boards for which
ae23c9
migration compatibility is broken.
ae23c9
ae23c9
Signed-off-by: Cédric Le Goater <clg@kaod.org>
ae23c9
Reviewed-by: Juan Quintela <quintela@redhat.com>
ae23c9
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ae23c9
Signed-off-by: Juan Quintela <quintela@redhat.com>
ae23c9
(cherry picked from commit b895de502717b83b4e5f089df617cb23530c4d2d)
ae23c9
Signed-off-by: Paul Lai <plai@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 exec.c                    | 38 ++++++++++++++++++++++++++++++++++++++
ae23c9
 include/exec/cpu-common.h |  4 ++++
ae23c9
 migration/postcopy-ram.c  | 12 ++++++------
ae23c9
 migration/ram.c           | 46 ++++++++++++++++++++++++++++++++++------------
ae23c9
 migration/savevm.c        |  2 ++
ae23c9
 5 files changed, 84 insertions(+), 18 deletions(-)
ae23c9
ae23c9
diff --git a/exec.c b/exec.c
ae23c9
index 22cc7ef..fff49ba 100644
ae23c9
--- a/exec.c
ae23c9
+++ b/exec.c
ae23c9
@@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
ae23c9
  * (Set during postcopy)
ae23c9
  */
ae23c9
 #define RAM_UF_ZEROPAGE (1 << 3)
ae23c9
+
ae23c9
+/* RAM can be migrated */
ae23c9
+#define RAM_MIGRATABLE (1 << 4)
ae23c9
 #endif
ae23c9
 
ae23c9
 #ifdef TARGET_PAGE_BITS_VARY
ae23c9
@@ -1811,6 +1814,21 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
ae23c9
     rb->flags |= RAM_UF_ZEROPAGE;
ae23c9
 }
ae23c9
 
ae23c9
+bool qemu_ram_is_migratable(RAMBlock *rb)
ae23c9
+{
ae23c9
+    return rb->flags & RAM_MIGRATABLE;
ae23c9
+}
ae23c9
+
ae23c9
+void qemu_ram_set_migratable(RAMBlock *rb)
ae23c9
+{
ae23c9
+    rb->flags |= RAM_MIGRATABLE;
ae23c9
+}
ae23c9
+
ae23c9
+void qemu_ram_unset_migratable(RAMBlock *rb)
ae23c9
+{
ae23c9
+    rb->flags &= ~RAM_MIGRATABLE;
ae23c9
+}
ae23c9
+
ae23c9
 /* Called with iothread lock held.  */
ae23c9
 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
ae23c9
 {
ae23c9
@@ -3754,6 +3772,26 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
ae23c9
     return ret;
ae23c9
 }
ae23c9
 
ae23c9
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque)
ae23c9
+{
ae23c9
+    RAMBlock *block;
ae23c9
+    int ret = 0;
ae23c9
+
ae23c9
+    rcu_read_lock();
ae23c9
+    RAMBLOCK_FOREACH(block) {
ae23c9
+        if (!qemu_ram_is_migratable(block)) {
ae23c9
+            continue;
ae23c9
+        }
ae23c9
+        ret = func(block->idstr, block->host, block->offset,
ae23c9
+                   block->used_length, opaque);
ae23c9
+        if (ret) {
ae23c9
+            break;
ae23c9
+        }
ae23c9
+    }
ae23c9
+    rcu_read_unlock();
ae23c9
+    return ret;
ae23c9
+}
ae23c9
+
ae23c9
 /*
ae23c9
  * Unmap pages of memory from start to start+length such that
ae23c9
  * they a) read as 0, b) Trigger whatever fault mechanism
ae23c9
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
ae23c9
index 24d335f..0b58e26 100644
ae23c9
--- a/include/exec/cpu-common.h
ae23c9
+++ b/include/exec/cpu-common.h
ae23c9
@@ -75,6 +75,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
ae23c9
 bool qemu_ram_is_shared(RAMBlock *rb);
ae23c9
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
ae23c9
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
ae23c9
+bool qemu_ram_is_migratable(RAMBlock *rb);
ae23c9
+void qemu_ram_set_migratable(RAMBlock *rb);
ae23c9
+void qemu_ram_unset_migratable(RAMBlock *rb);
ae23c9
 
ae23c9
 size_t qemu_ram_pagesize(RAMBlock *block);
ae23c9
 size_t qemu_ram_pagesize_largest(void);
ae23c9
@@ -119,6 +122,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
ae23c9
     ram_addr_t offset, ram_addr_t length, void *opaque);
ae23c9
 
ae23c9
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
ae23c9
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
ae23c9
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
ae23c9
 
ae23c9
 #endif
ae23c9
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
ae23c9
index b04e903..4b65ff9 100644
ae23c9
--- a/migration/postcopy-ram.c
ae23c9
+++ b/migration/postcopy-ram.c
ae23c9
@@ -264,7 +264,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
ae23c9
     }
ae23c9
 
ae23c9
     /* We don't support postcopy with shared RAM yet */
ae23c9
-    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
ae23c9
+    if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
ae23c9
         goto out;
ae23c9
     }
ae23c9
 
ae23c9
@@ -392,7 +392,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
ae23c9
  */
ae23c9
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
ae23c9
 {
ae23c9
-    if (qemu_ram_foreach_block(init_range, NULL)) {
ae23c9
+    if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
ae23c9
         return -1;
ae23c9
     }
ae23c9
 
ae23c9
@@ -428,7 +428,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
ae23c9
             return -1;
ae23c9
         }
ae23c9
 
ae23c9
-        if (qemu_ram_foreach_block(cleanup_range, mis)) {
ae23c9
+        if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
ae23c9
             return -1;
ae23c9
         }
ae23c9
         /* Let the fault thread quit */
ae23c9
@@ -494,7 +494,7 @@ static int nhp_range(const char *block_name, void *host_addr,
ae23c9
  */
ae23c9
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
ae23c9
 {
ae23c9
-    if (qemu_ram_foreach_block(nhp_range, mis)) {
ae23c9
+    if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
ae23c9
         return -1;
ae23c9
     }
ae23c9
 
ae23c9
@@ -505,7 +505,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
ae23c9
 
ae23c9
 /*
ae23c9
  * Mark the given area of RAM as requiring notification to unwritten areas
ae23c9
- * Used as a  callback on qemu_ram_foreach_block.
ae23c9
+ * Used as a  callback on qemu_ram_foreach_migratable_block.
ae23c9
  *   host_addr: Base of area to mark
ae23c9
  *   offset: Offset in the whole ram arena
ae23c9
  *   length: Length of the section
ae23c9
@@ -807,7 +807,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
ae23c9
     mis->have_fault_thread = true;
ae23c9
 
ae23c9
     /* Mark so that we get notified of accesses to unwritten areas */
ae23c9
-    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
ae23c9
+    if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
ae23c9
         return -1;
ae23c9
     }
ae23c9
 
ae23c9
diff --git a/migration/ram.c b/migration/ram.c
ae23c9
index bd563b5..04b5df5 100644
ae23c9
--- a/migration/ram.c
ae23c9
+++ b/migration/ram.c
ae23c9
@@ -153,11 +153,16 @@ out:
ae23c9
     return ret;
ae23c9
 }
ae23c9
 
ae23c9
+/* Should be holding either ram_list.mutex, or the RCU lock. */
ae23c9
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
ae23c9
+    RAMBLOCK_FOREACH(block)                            \
ae23c9
+        if (!qemu_ram_is_migratable(block)) {} else
ae23c9
+
ae23c9
 static void ramblock_recv_map_init(void)
ae23c9
 {
ae23c9
     RAMBlock *rb;
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(rb) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
ae23c9
         assert(!rb->receivedmap);
ae23c9
         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
ae23c9
     }
ae23c9
@@ -813,6 +818,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
ae23c9
     unsigned long *bitmap = rb->bmap;
ae23c9
     unsigned long next;
ae23c9
 
ae23c9
+    if (!qemu_ram_is_migratable(rb)) {
ae23c9
+        return size;
ae23c9
+    }
ae23c9
+
ae23c9
     if (rs->ram_bulk_stage && start > 0) {
ae23c9
         next = start + 1;
ae23c9
     } else {
ae23c9
@@ -858,7 +867,7 @@ uint64_t ram_pagesize_summary(void)
ae23c9
     RAMBlock *block;
ae23c9
     uint64_t summary = 0;
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         summary |= block->page_size;
ae23c9
     }
ae23c9
 
ae23c9
@@ -882,7 +891,7 @@ static void migration_bitmap_sync(RAMState *rs)
ae23c9
 
ae23c9
     qemu_mutex_lock(&rs->bitmap_mutex);
ae23c9
     rcu_read_lock();
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
ae23c9
     }
ae23c9
     ram_counters.remaining = ram_bytes_remaining();
ae23c9
@@ -1522,6 +1531,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
ae23c9
     size_t pagesize_bits =
ae23c9
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
ae23c9
 
ae23c9
+    if (!qemu_ram_is_migratable(pss->block)) {
ae23c9
+        error_report("block %s should not be migrated !", pss->block->idstr);
ae23c9
+        return 0;
ae23c9
+    }
ae23c9
+
ae23c9
     do {
ae23c9
         /* Check the pages is dirty and if it is send it */
ae23c9
         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
ae23c9
@@ -1620,7 +1634,7 @@ uint64_t ram_bytes_total(void)
ae23c9
     uint64_t total = 0;
ae23c9
 
ae23c9
     rcu_read_lock();
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         total += block->used_length;
ae23c9
     }
ae23c9
     rcu_read_unlock();
ae23c9
@@ -1675,7 +1689,7 @@ static void ram_save_cleanup(void *opaque)
ae23c9
      */
ae23c9
     memory_global_dirty_log_stop();
ae23c9
 
ae23c9
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         g_free(block->bmap);
ae23c9
         block->bmap = NULL;
ae23c9
         g_free(block->unsentmap);
ae23c9
@@ -1738,7 +1752,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
ae23c9
 {
ae23c9
     struct RAMBlock *block;
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         unsigned long *bitmap = block->bmap;
ae23c9
         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
ae23c9
         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
ae23c9
@@ -1816,7 +1830,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
ae23c9
     struct RAMBlock *block;
ae23c9
     int ret;
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         PostcopyDiscardState *pds =
ae23c9
             postcopy_discard_send_init(ms, block->idstr);
ae23c9
 
ae23c9
@@ -2024,7 +2038,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
ae23c9
     rs->last_sent_block = NULL;
ae23c9
     rs->last_page = 0;
ae23c9
 
ae23c9
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
ae23c9
         unsigned long *bitmap = block->bmap;
ae23c9
         unsigned long *unsentmap = block->unsentmap;
ae23c9
@@ -2183,7 +2197,7 @@ static void ram_list_init_bitmaps(void)
ae23c9
 
ae23c9
     /* Skip setting bitmap if there is no RAM */
ae23c9
     if (ram_bytes_total()) {
ae23c9
-        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
ae23c9
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
             pages = block->max_length >> TARGET_PAGE_BITS;
ae23c9
             block->bmap = bitmap_new(pages);
ae23c9
             bitmap_set(block->bmap, 0, pages);
ae23c9
@@ -2264,7 +2278,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
ae23c9
 
ae23c9
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(block) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
ae23c9
         qemu_put_byte(f, strlen(block->idstr));
ae23c9
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
ae23c9
         qemu_put_be64(f, block->used_length);
ae23c9
@@ -2508,6 +2522,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
ae23c9
         return NULL;
ae23c9
     }
ae23c9
 
ae23c9
+    if (!qemu_ram_is_migratable(block)) {
ae23c9
+        error_report("block %s should not be migrated !", id);
ae23c9
+        return NULL;
ae23c9
+    }
ae23c9
+
ae23c9
     return block;
ae23c9
 }
ae23c9
 
ae23c9
@@ -2750,7 +2769,7 @@ static int ram_load_cleanup(void *opaque)
ae23c9
     xbzrle_load_cleanup();
ae23c9
     compress_threads_load_cleanup();
ae23c9
 
ae23c9
-    RAMBLOCK_FOREACH(rb) {
ae23c9
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
ae23c9
         g_free(rb->receivedmap);
ae23c9
         rb->receivedmap = NULL;
ae23c9
     }
ae23c9
@@ -3012,7 +3031,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ae23c9
                 length = qemu_get_be64(f);
ae23c9
 
ae23c9
                 block = qemu_ram_block_by_name(id);
ae23c9
-                if (block) {
ae23c9
+                if (block && !qemu_ram_is_migratable(block)) {
ae23c9
+                    error_report("block %s should not be migrated !", id);
ae23c9
+                    ret = -EINVAL;
ae23c9
+                } else if (block) {
ae23c9
                     if (length != block->used_length) {
ae23c9
                         Error *local_err = NULL;
ae23c9
 
ae23c9
diff --git a/migration/savevm.c b/migration/savevm.c
ae23c9
index edb3b94..0bb9446 100644
ae23c9
--- a/migration/savevm.c
ae23c9
+++ b/migration/savevm.c
ae23c9
@@ -2506,11 +2506,13 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
ae23c9
 {
ae23c9
     qemu_ram_set_idstr(mr->ram_block,
ae23c9
                        memory_region_name(mr), dev);
ae23c9
+    qemu_ram_set_migratable(mr->ram_block);
ae23c9
 }
ae23c9
 
ae23c9
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
ae23c9
 {
ae23c9
     qemu_ram_unset_idstr(mr->ram_block);
ae23c9
+    qemu_ram_unset_migratable(mr->ram_block);
ae23c9
 }
ae23c9
 
ae23c9
 void vmstate_register_ram_global(MemoryRegion *mr)
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9