26ba25
From c38aaf5a09a4f06096f9a66ca3a7c22c7e657a4f Mon Sep 17 00:00:00 2001
26ba25
From: "plai@redhat.com" <plai@redhat.com>
26ba25
Date: Mon, 7 Jan 2019 17:02:14 +0000
26ba25
Subject: [PATCH 13/22] migration: discard non-migratable RAMBlocks
26ba25
MIME-Version: 1.0
26ba25
Content-Type: text/plain; charset=UTF-8
26ba25
Content-Transfer-Encoding: 8bit
26ba25
26ba25
RH-Author: plai@redhat.com
26ba25
Message-id: <1546880543-24860-2-git-send-email-plai@redhat.com>
26ba25
Patchwork-id: 83886
26ba25
O-Subject: [RHEL8.0 qemu-kvm PATCH v7 01/10] migration: discard non-migratable RAMBlocks
26ba25
Bugzilla: 1539285
26ba25
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
26ba25
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
26ba25
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
26ba25
26ba25
From: Cédric Le Goater <clg@kaod.org>
26ba25
26ba25
On the POWER9 processor, the XIVE interrupt controller can control
26ba25
interrupt sources using MMIO to trigger events, to EOI or to turn off
26ba25
the sources. Priority management and interrupt acknowledgment is also
26ba25
controlled by MMIO in the presenter sub-engine.
26ba25
26ba25
These MMIO regions are exposed to guests in QEMU with a set of 'ram
26ba25
device' memory mappings, similarly to VFIO, and the VMAs are populated
26ba25
dynamically with the appropriate pages using a fault handler.
26ba25
26ba25
But, these regions are an issue for migration. We need to discard the
26ba25
associated RAMBlocks from the RAM state on the source VM and let the
26ba25
destination VM rebuild the memory mappings on the new host in the
26ba25
post_load() operation just before resuming the system.
26ba25
26ba25
To achieve this goal, the following introduces a new RAMBlock flag
26ba25
RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
26ba25
vmstate_unregister_ram() routines. This flag is then used by the
26ba25
migration to identify RAMBlocks to discard on the source. Some checks
26ba25
are also performed on the destination to make sure nothing invalid was
26ba25
sent.
26ba25
26ba25
This change impacts the boston, malta and jazz mips boards for which
26ba25
migration compatibility is broken.
26ba25
26ba25
Signed-off-by: Cédric Le Goater <clg@kaod.org>
26ba25
Reviewed-by: Juan Quintela <quintela@redhat.com>
26ba25
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
26ba25
Signed-off-by: Juan Quintela <quintela@redhat.com>
26ba25
(cherry picked from commit b895de502717b83b4e5f089df617cb23530c4d2d)
26ba25
Signed-off-by: Paul Lai <plai@redhat.com>
26ba25
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
26ba25
---
26ba25
 exec.c                    | 38 ++++++++++++++++++++++++++++++++++++++
26ba25
 include/exec/cpu-common.h |  4 ++++
26ba25
 migration/postcopy-ram.c  | 12 ++++++------
26ba25
 migration/ram.c           | 46 ++++++++++++++++++++++++++++++++++------------
26ba25
 migration/savevm.c        |  2 ++
26ba25
 5 files changed, 84 insertions(+), 18 deletions(-)
26ba25
26ba25
diff --git a/exec.c b/exec.c
26ba25
index 22cc7ef..fff49ba 100644
26ba25
--- a/exec.c
26ba25
+++ b/exec.c
26ba25
@@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned;
26ba25
  * (Set during postcopy)
26ba25
  */
26ba25
 #define RAM_UF_ZEROPAGE (1 << 3)
26ba25
+
26ba25
+/* RAM can be migrated */
26ba25
+#define RAM_MIGRATABLE (1 << 4)
26ba25
 #endif
26ba25
 
26ba25
 #ifdef TARGET_PAGE_BITS_VARY
26ba25
@@ -1811,6 +1814,21 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb)
26ba25
     rb->flags |= RAM_UF_ZEROPAGE;
26ba25
 }
26ba25
 
26ba25
+bool qemu_ram_is_migratable(RAMBlock *rb)
26ba25
+{
26ba25
+    return rb->flags & RAM_MIGRATABLE;
26ba25
+}
26ba25
+
26ba25
+void qemu_ram_set_migratable(RAMBlock *rb)
26ba25
+{
26ba25
+    rb->flags |= RAM_MIGRATABLE;
26ba25
+}
26ba25
+
26ba25
+void qemu_ram_unset_migratable(RAMBlock *rb)
26ba25
+{
26ba25
+    rb->flags &= ~RAM_MIGRATABLE;
26ba25
+}
26ba25
+
26ba25
 /* Called with iothread lock held.  */
26ba25
 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
26ba25
 {
26ba25
@@ -3754,6 +3772,26 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
26ba25
     return ret;
26ba25
 }
26ba25
 
26ba25
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque)
26ba25
+{
26ba25
+    RAMBlock *block;
26ba25
+    int ret = 0;
26ba25
+
26ba25
+    rcu_read_lock();
26ba25
+    RAMBLOCK_FOREACH(block) {
26ba25
+        if (!qemu_ram_is_migratable(block)) {
26ba25
+            continue;
26ba25
+        }
26ba25
+        ret = func(block->idstr, block->host, block->offset,
26ba25
+                   block->used_length, opaque);
26ba25
+        if (ret) {
26ba25
+            break;
26ba25
+        }
26ba25
+    }
26ba25
+    rcu_read_unlock();
26ba25
+    return ret;
26ba25
+}
26ba25
+
26ba25
 /*
26ba25
  * Unmap pages of memory from start to start+length such that
26ba25
  * they a) read as 0, b) Trigger whatever fault mechanism
26ba25
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
26ba25
index 24d335f..0b58e26 100644
26ba25
--- a/include/exec/cpu-common.h
26ba25
+++ b/include/exec/cpu-common.h
26ba25
@@ -75,6 +75,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
26ba25
 bool qemu_ram_is_shared(RAMBlock *rb);
26ba25
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
26ba25
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
26ba25
+bool qemu_ram_is_migratable(RAMBlock *rb);
26ba25
+void qemu_ram_set_migratable(RAMBlock *rb);
26ba25
+void qemu_ram_unset_migratable(RAMBlock *rb);
26ba25
 
26ba25
 size_t qemu_ram_pagesize(RAMBlock *block);
26ba25
 size_t qemu_ram_pagesize_largest(void);
26ba25
@@ -119,6 +122,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
26ba25
     ram_addr_t offset, ram_addr_t length, void *opaque);
26ba25
 
26ba25
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
26ba25
+int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
26ba25
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
26ba25
 
26ba25
 #endif
26ba25
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
26ba25
index b04e903..4b65ff9 100644
26ba25
--- a/migration/postcopy-ram.c
26ba25
+++ b/migration/postcopy-ram.c
26ba25
@@ -264,7 +264,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
26ba25
     }
26ba25
 
26ba25
     /* We don't support postcopy with shared RAM yet */
26ba25
-    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
26ba25
+    if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) {
26ba25
         goto out;
26ba25
     }
26ba25
 
26ba25
@@ -392,7 +392,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
26ba25
  */
26ba25
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
26ba25
 {
26ba25
-    if (qemu_ram_foreach_block(init_range, NULL)) {
26ba25
+    if (qemu_ram_foreach_migratable_block(init_range, NULL)) {
26ba25
         return -1;
26ba25
     }
26ba25
 
26ba25
@@ -428,7 +428,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
26ba25
             return -1;
26ba25
         }
26ba25
 
26ba25
-        if (qemu_ram_foreach_block(cleanup_range, mis)) {
26ba25
+        if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) {
26ba25
             return -1;
26ba25
         }
26ba25
         /* Let the fault thread quit */
26ba25
@@ -494,7 +494,7 @@ static int nhp_range(const char *block_name, void *host_addr,
26ba25
  */
26ba25
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
26ba25
 {
26ba25
-    if (qemu_ram_foreach_block(nhp_range, mis)) {
26ba25
+    if (qemu_ram_foreach_migratable_block(nhp_range, mis)) {
26ba25
         return -1;
26ba25
     }
26ba25
 
26ba25
@@ -505,7 +505,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
26ba25
 
26ba25
 /*
26ba25
  * Mark the given area of RAM as requiring notification to unwritten areas
26ba25
- * Used as a  callback on qemu_ram_foreach_block.
26ba25
+ * Used as a  callback on qemu_ram_foreach_migratable_block.
26ba25
  *   host_addr: Base of area to mark
26ba25
  *   offset: Offset in the whole ram arena
26ba25
  *   length: Length of the section
26ba25
@@ -807,7 +807,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
26ba25
     mis->have_fault_thread = true;
26ba25
 
26ba25
     /* Mark so that we get notified of accesses to unwritten areas */
26ba25
-    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
26ba25
+    if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
26ba25
         return -1;
26ba25
     }
26ba25
 
26ba25
diff --git a/migration/ram.c b/migration/ram.c
26ba25
index bd563b5..04b5df5 100644
26ba25
--- a/migration/ram.c
26ba25
+++ b/migration/ram.c
26ba25
@@ -153,11 +153,16 @@ out:
26ba25
     return ret;
26ba25
 }
26ba25
 
26ba25
+/* Should be holding either ram_list.mutex, or the RCU lock. */
26ba25
+#define RAMBLOCK_FOREACH_MIGRATABLE(block)             \
26ba25
+    RAMBLOCK_FOREACH(block)                            \
26ba25
+        if (!qemu_ram_is_migratable(block)) {} else
26ba25
+
26ba25
 static void ramblock_recv_map_init(void)
26ba25
 {
26ba25
     RAMBlock *rb;
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(rb) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
26ba25
         assert(!rb->receivedmap);
26ba25
         rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
26ba25
     }
26ba25
@@ -813,6 +818,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
26ba25
     unsigned long *bitmap = rb->bmap;
26ba25
     unsigned long next;
26ba25
 
26ba25
+    if (!qemu_ram_is_migratable(rb)) {
26ba25
+        return size;
26ba25
+    }
26ba25
+
26ba25
     if (rs->ram_bulk_stage && start > 0) {
26ba25
         next = start + 1;
26ba25
     } else {
26ba25
@@ -858,7 +867,7 @@ uint64_t ram_pagesize_summary(void)
26ba25
     RAMBlock *block;
26ba25
     uint64_t summary = 0;
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         summary |= block->page_size;
26ba25
     }
26ba25
 
26ba25
@@ -882,7 +891,7 @@ static void migration_bitmap_sync(RAMState *rs)
26ba25
 
26ba25
     qemu_mutex_lock(&rs->bitmap_mutex);
26ba25
     rcu_read_lock();
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         migration_bitmap_sync_range(rs, block, 0, block->used_length);
26ba25
     }
26ba25
     ram_counters.remaining = ram_bytes_remaining();
26ba25
@@ -1522,6 +1531,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
26ba25
     size_t pagesize_bits =
26ba25
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
26ba25
 
26ba25
+    if (!qemu_ram_is_migratable(pss->block)) {
26ba25
+        error_report("block %s should not be migrated !", pss->block->idstr);
26ba25
+        return 0;
26ba25
+    }
26ba25
+
26ba25
     do {
26ba25
         /* Check the pages is dirty and if it is send it */
26ba25
         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
26ba25
@@ -1620,7 +1634,7 @@ uint64_t ram_bytes_total(void)
26ba25
     uint64_t total = 0;
26ba25
 
26ba25
     rcu_read_lock();
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         total += block->used_length;
26ba25
     }
26ba25
     rcu_read_unlock();
26ba25
@@ -1675,7 +1689,7 @@ static void ram_save_cleanup(void *opaque)
26ba25
      */
26ba25
     memory_global_dirty_log_stop();
26ba25
 
26ba25
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         g_free(block->bmap);
26ba25
         block->bmap = NULL;
26ba25
         g_free(block->unsentmap);
26ba25
@@ -1738,7 +1752,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
26ba25
 {
26ba25
     struct RAMBlock *block;
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         unsigned long *bitmap = block->bmap;
26ba25
         unsigned long range = block->used_length >> TARGET_PAGE_BITS;
26ba25
         unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
26ba25
@@ -1816,7 +1830,7 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
26ba25
     struct RAMBlock *block;
26ba25
     int ret;
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         PostcopyDiscardState *pds =
26ba25
             postcopy_discard_send_init(ms, block->idstr);
26ba25
 
26ba25
@@ -2024,7 +2038,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
26ba25
     rs->last_sent_block = NULL;
26ba25
     rs->last_page = 0;
26ba25
 
26ba25
-    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
26ba25
         unsigned long *bitmap = block->bmap;
26ba25
         unsigned long *unsentmap = block->unsentmap;
26ba25
@@ -2183,7 +2197,7 @@ static void ram_list_init_bitmaps(void)
26ba25
 
26ba25
     /* Skip setting bitmap if there is no RAM */
26ba25
     if (ram_bytes_total()) {
26ba25
-        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
26ba25
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
             pages = block->max_length >> TARGET_PAGE_BITS;
26ba25
             block->bmap = bitmap_new(pages);
26ba25
             bitmap_set(block->bmap, 0, pages);
26ba25
@@ -2264,7 +2278,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
26ba25
 
26ba25
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(block) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
26ba25
         qemu_put_byte(f, strlen(block->idstr));
26ba25
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
26ba25
         qemu_put_be64(f, block->used_length);
26ba25
@@ -2508,6 +2522,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
26ba25
         return NULL;
26ba25
     }
26ba25
 
26ba25
+    if (!qemu_ram_is_migratable(block)) {
26ba25
+        error_report("block %s should not be migrated !", id);
26ba25
+        return NULL;
26ba25
+    }
26ba25
+
26ba25
     return block;
26ba25
 }
26ba25
 
26ba25
@@ -2750,7 +2769,7 @@ static int ram_load_cleanup(void *opaque)
26ba25
     xbzrle_load_cleanup();
26ba25
     compress_threads_load_cleanup();
26ba25
 
26ba25
-    RAMBLOCK_FOREACH(rb) {
26ba25
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
26ba25
         g_free(rb->receivedmap);
26ba25
         rb->receivedmap = NULL;
26ba25
     }
26ba25
@@ -3012,7 +3031,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
26ba25
                 length = qemu_get_be64(f);
26ba25
 
26ba25
                 block = qemu_ram_block_by_name(id);
26ba25
-                if (block) {
26ba25
+                if (block && !qemu_ram_is_migratable(block)) {
26ba25
+                    error_report("block %s should not be migrated !", id);
26ba25
+                    ret = -EINVAL;
26ba25
+                } else if (block) {
26ba25
                     if (length != block->used_length) {
26ba25
                         Error *local_err = NULL;
26ba25
 
26ba25
diff --git a/migration/savevm.c b/migration/savevm.c
26ba25
index edb3b94..0bb9446 100644
26ba25
--- a/migration/savevm.c
26ba25
+++ b/migration/savevm.c
26ba25
@@ -2506,11 +2506,13 @@ void vmstate_register_ram(MemoryRegion *mr, DeviceState *dev)
26ba25
 {
26ba25
     qemu_ram_set_idstr(mr->ram_block,
26ba25
                        memory_region_name(mr), dev);
26ba25
+    qemu_ram_set_migratable(mr->ram_block);
26ba25
 }
26ba25
 
26ba25
 void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev)
26ba25
 {
26ba25
     qemu_ram_unset_idstr(mr->ram_block);
26ba25
+    qemu_ram_unset_migratable(mr->ram_block);
26ba25
 }
26ba25
 
26ba25
 void vmstate_register_ram_global(MemoryRegion *mr)
26ba25
-- 
26ba25
1.8.3.1
26ba25