thebeanogamer / rpms / qemu-kvm

Forked from rpms/qemu-kvm 5 months ago
Clone
7f1c5b
From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001
7f1c5b
From: "manish.mishra" <manish.mishra@nutanix.com>
7f1c5b
Date: Tue, 20 Dec 2022 18:44:18 +0000
7f1c5b
Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of
7f1c5b
 channels
7f1c5b
MIME-Version: 1.0
7f1c5b
Content-Type: text/plain; charset=UTF-8
7f1c5b
Content-Transfer-Encoding: 8bit
7f1c5b
7f1c5b
RH-Author: Peter Xu <peterx@redhat.com>
7f1c5b
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
7f1c5b
RH-Bugzilla: 2169732
7f1c5b
RH-Acked-by: quintela1 <quintela@redhat.com>
7f1c5b
RH-Acked-by: Leonardo BrĂ¡s <leobras@redhat.com>
7f1c5b
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
7f1c5b
RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm)
7f1c5b
7f1c5b
Current logic assumes that channel connections on the destination side are
7f1c5b
always established in the same order as the source and the first one will
7f1c5b
always be the main channel followed by the multifid or post-copy
7f1c5b
preemption channel. This may not be always true, as even if a channel has a
7f1c5b
connection established on the source side it can be in the pending state on
7f1c5b
the destination side and a newer connection can be established first.
7f1c5b
Basically causing out of order mapping of channels on the destination side.
7f1c5b
Currently, all channels except post-copy preempt send a magic number, this
7f1c5b
patch uses that magic number to decide the type of channel. This logic is
7f1c5b
applicable only for precopy(multifd) live migration, as mentioned, the
7f1c5b
post-copy preempt channel does not send any magic number. Also, tls live
7f1c5b
migrations already does tls handshake before creating other channels, so
7f1c5b
this issue is not possible with tls, hence this logic is avoided for tls
7f1c5b
live migrations. This patch uses read peek to check the magic number of
7f1c5b
channels so that current data/control stream management remains
7f1c5b
un-effected.
7f1c5b
7f1c5b
Reviewed-by: Peter Xu <peterx@redhat.com>
7f1c5b
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
7f1c5b
Reviewed-by: Juan Quintela <quintela@redhat.com>
7f1c5b
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
7f1c5b
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
7f1c5b
Signed-off-by: Juan Quintela <quintela@redhat.com>
7f1c5b
(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2)
7f1c5b
Signed-off-by: Peter Xu <peterx@redhat.com>
7f1c5b
---
7f1c5b
 migration/channel.c      | 45 +++++++++++++++++++++++++++++++++
7f1c5b
 migration/channel.h      |  5 ++++
7f1c5b
 migration/migration.c    | 54 ++++++++++++++++++++++++++++------------
7f1c5b
 migration/multifd.c      | 19 +++++++-------
7f1c5b
 migration/multifd.h      |  2 +-
7f1c5b
 migration/postcopy-ram.c |  5 +---
7f1c5b
 migration/postcopy-ram.h |  2 +-
7f1c5b
 7 files changed, 101 insertions(+), 31 deletions(-)
7f1c5b
7f1c5b
diff --git a/migration/channel.c b/migration/channel.c
7f1c5b
index 1b0815039f..ca3319a309 100644
7f1c5b
--- a/migration/channel.c
7f1c5b
+++ b/migration/channel.c
7f1c5b
@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s,
7f1c5b
     migrate_fd_connect(s, error);
7f1c5b
     error_free(error);
7f1c5b
 }
7f1c5b
+
7f1c5b
+
7f1c5b
+/**
7f1c5b
+ * @migration_channel_read_peek - Peek at migration channel, without
7f1c5b
+ *     actually removing it from channel buffer.
7f1c5b
+ *
7f1c5b
+ * @ioc: the channel object
7f1c5b
+ * @buf: the memory region to read data into
7f1c5b
+ * @buflen: the number of bytes to read in @buf
7f1c5b
+ * @errp: pointer to a NULL-initialized error object
7f1c5b
+ *
7f1c5b
+ * Returns 0 if successful, returns -1 and sets @errp if fails.
7f1c5b
+ */
7f1c5b
+int migration_channel_read_peek(QIOChannel *ioc,
7f1c5b
+                                const char *buf,
7f1c5b
+                                const size_t buflen,
7f1c5b
+                                Error **errp)
7f1c5b
+{
7f1c5b
+    ssize_t len = 0;
7f1c5b
+    struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
7f1c5b
+
7f1c5b
+    while (true) {
7f1c5b
+        len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL,
7f1c5b
+                                     QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
7f1c5b
+
7f1c5b
+        if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) {
7f1c5b
+            error_setg(errp,
7f1c5b
+                       "Failed to peek at channel");
7f1c5b
+            return -1;
7f1c5b
+        }
7f1c5b
+
7f1c5b
+        if (len == buflen) {
7f1c5b
+            break;
7f1c5b
+        }
7f1c5b
+
7f1c5b
+        /* 1ms sleep. */
7f1c5b
+        if (qemu_in_coroutine()) {
7f1c5b
+            qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
7f1c5b
+        } else {
7f1c5b
+            g_usleep(1000);
7f1c5b
+        }
7f1c5b
+    }
7f1c5b
+
7f1c5b
+    return 0;
7f1c5b
+}
7f1c5b
diff --git a/migration/channel.h b/migration/channel.h
7f1c5b
index 67a461c28a..5bdb8208a7 100644
7f1c5b
--- a/migration/channel.h
7f1c5b
+++ b/migration/channel.h
7f1c5b
@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s,
7f1c5b
                                QIOChannel *ioc,
7f1c5b
                                const char *hostname,
7f1c5b
                                Error *error_in);
7f1c5b
+
7f1c5b
+int migration_channel_read_peek(QIOChannel *ioc,
7f1c5b
+                                const char *buf,
7f1c5b
+                                const size_t buflen,
7f1c5b
+                                Error **errp);
7f1c5b
 #endif
7f1c5b
diff --git a/migration/migration.c b/migration/migration.c
7f1c5b
index f485eea5fb..593dbd25de 100644
7f1c5b
--- a/migration/migration.c
7f1c5b
+++ b/migration/migration.c
7f1c5b
@@ -31,6 +31,7 @@
7f1c5b
 #include "migration.h"
7f1c5b
 #include "savevm.h"
7f1c5b
 #include "qemu-file.h"
7f1c5b
+#include "channel.h"
7f1c5b
 #include "migration/vmstate.h"
7f1c5b
 #include "block/block.h"
7f1c5b
 #include "qapi/error.h"
7f1c5b
@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
7f1c5b
 {
7f1c5b
     MigrationIncomingState *mis = migration_incoming_get_current();
7f1c5b
 
7f1c5b
-    if (multifd_load_setup(errp) != 0) {
7f1c5b
-        return false;
7f1c5b
-    }
7f1c5b
-
7f1c5b
     if (!mis->from_src_file) {
7f1c5b
         mis->from_src_file = f;
7f1c5b
     }
7f1c5b
@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
7f1c5b
 {
7f1c5b
     MigrationIncomingState *mis = migration_incoming_get_current();
7f1c5b
     Error *local_err = NULL;
7f1c5b
-    bool start_migration;
7f1c5b
     QEMUFile *f;
7f1c5b
+    bool default_channel = true;
7f1c5b
+    uint32_t channel_magic = 0;
7f1c5b
+    int ret = 0;
7f1c5b
 
7f1c5b
-    if (!mis->from_src_file) {
7f1c5b
-        /* The first connection (multifd may have multiple) */
7f1c5b
+    if (migrate_use_multifd() && !migrate_postcopy_ram() &&
7f1c5b
+        qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
7f1c5b
+        /*
7f1c5b
+         * With multiple channels, it is possible that we receive channels
7f1c5b
+         * out of order on destination side, causing incorrect mapping of
7f1c5b
+         * source channels on destination side. Check channel MAGIC to
7f1c5b
+         * decide type of channel. Please note this is best effort, postcopy
7f1c5b
+         * preempt channel does not send any magic number so avoid it for
7f1c5b
+         * postcopy live migration. Also tls live migration already does
7f1c5b
+         * tls handshake while initializing main channel so with tls this
7f1c5b
+         * issue is not possible.
7f1c5b
+         */
7f1c5b
+        ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
7f1c5b
+                                          sizeof(channel_magic), &local_err);
7f1c5b
+
7f1c5b
+        if (ret != 0) {
7f1c5b
+            error_propagate(errp, local_err);
7f1c5b
+            return;
7f1c5b
+        }
7f1c5b
+
7f1c5b
+        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
7f1c5b
+    } else {
7f1c5b
+        default_channel = !mis->from_src_file;
7f1c5b
+    }
7f1c5b
+
7f1c5b
+    if (multifd_load_setup(errp) != 0) {
7f1c5b
+        error_setg(errp, "Failed to setup multifd channels");
7f1c5b
+        return;
7f1c5b
+    }
7f1c5b
+
7f1c5b
+    if (default_channel) {
7f1c5b
         f = qemu_file_new_input(ioc);
7f1c5b
 
7f1c5b
         if (!migration_incoming_setup(f, errp)) {
7f1c5b
             return;
7f1c5b
         }
7f1c5b
-
7f1c5b
-        /*
7f1c5b
-         * Common migration only needs one channel, so we can start
7f1c5b
-         * right now.  Some features need more than one channel, we wait.
7f1c5b
-         */
7f1c5b
-        start_migration = !migration_needs_multiple_sockets();
7f1c5b
     } else {
7f1c5b
         /* Multiple connections */
7f1c5b
         assert(migration_needs_multiple_sockets());
7f1c5b
         if (migrate_use_multifd()) {
7f1c5b
-            start_migration = multifd_recv_new_channel(ioc, &local_err);
7f1c5b
+            multifd_recv_new_channel(ioc, &local_err);
7f1c5b
         } else {
7f1c5b
             assert(migrate_postcopy_preempt());
7f1c5b
             f = qemu_file_new_input(ioc);
7f1c5b
-            start_migration = postcopy_preempt_new_channel(mis, f);
7f1c5b
+            postcopy_preempt_new_channel(mis, f);
7f1c5b
         }
7f1c5b
         if (local_err) {
7f1c5b
             error_propagate(errp, local_err);
7f1c5b
@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
7f1c5b
         }
7f1c5b
     }
7f1c5b
 
7f1c5b
-    if (start_migration) {
7f1c5b
+    if (migration_has_all_channels()) {
7f1c5b
         /* If it's a recovery, we're done */
7f1c5b
         if (postcopy_try_recover()) {
7f1c5b
             return;
7f1c5b
diff --git a/migration/multifd.c b/migration/multifd.c
7f1c5b
index 509bbbe3bf..c3385529cf 100644
7f1c5b
--- a/migration/multifd.c
7f1c5b
+++ b/migration/multifd.c
7f1c5b
@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp)
7f1c5b
     uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
7f1c5b
     uint8_t i;
7f1c5b
 
7f1c5b
-    if (!migrate_use_multifd()) {
7f1c5b
+    /*
7f1c5b
+     * Return successfully if multiFD recv state is already initialised
7f1c5b
+     * or multiFD is not enabled.
7f1c5b
+     */
7f1c5b
+    if (multifd_recv_state || !migrate_use_multifd()) {
7f1c5b
         return 0;
7f1c5b
     }
7f1c5b
+
7f1c5b
     if (!migrate_multi_channels_is_allowed()) {
7f1c5b
         error_setg(errp, "multifd is not supported by current protocol");
7f1c5b
         return -1;
7f1c5b
@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void)
7f1c5b
 
7f1c5b
 /*
7f1c5b
  * Try to receive all multifd channels to get ready for the migration.
7f1c5b
- * - Return true and do not set @errp when correctly receiving all channels;
7f1c5b
- * - Return false and do not set @errp when correctly receiving the current one;
7f1c5b
- * - Return false and set @errp when failing to receive the current channel.
7f1c5b
+ * Sets @errp when failing to receive the current channel.
7f1c5b
  */
7f1c5b
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
7f1c5b
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
7f1c5b
 {
7f1c5b
     MultiFDRecvParams *p;
7f1c5b
     Error *local_err = NULL;
7f1c5b
@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
7f1c5b
                                 "failed to receive packet"
7f1c5b
                                 " via multifd channel %d: ",
7f1c5b
                                 qatomic_read(&multifd_recv_state->count));
7f1c5b
-        return false;
7f1c5b
+        return;
7f1c5b
     }
7f1c5b
     trace_multifd_recv_new_channel(id);
7f1c5b
 
7f1c5b
@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
7f1c5b
                    id);
7f1c5b
         multifd_recv_terminate_threads(local_err);
7f1c5b
         error_propagate(errp, local_err);
7f1c5b
-        return false;
7f1c5b
+        return;
7f1c5b
     }
7f1c5b
     p->c = ioc;
7f1c5b
     object_ref(OBJECT(ioc));
7f1c5b
@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
7f1c5b
     qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
7f1c5b
                        QEMU_THREAD_JOINABLE);
7f1c5b
     qatomic_inc(&multifd_recv_state->count);
7f1c5b
-    return qatomic_read(&multifd_recv_state->count) ==
7f1c5b
-           migrate_multifd_channels();
7f1c5b
 }
7f1c5b
diff --git a/migration/multifd.h b/migration/multifd.h
7f1c5b
index 519f498643..913e4ba274 100644
7f1c5b
--- a/migration/multifd.h
7f1c5b
+++ b/migration/multifd.h
7f1c5b
@@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
7f1c5b
 int multifd_load_setup(Error **errp);
7f1c5b
 int multifd_load_cleanup(Error **errp);
7f1c5b
 bool multifd_recv_all_channels_created(void);
7f1c5b
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
7f1c5b
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
7f1c5b
 void multifd_recv_sync_main(void);
7f1c5b
 int multifd_send_sync_main(QEMUFile *f);
7f1c5b
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
7f1c5b
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
7f1c5b
index 0c55df0e52..b98e95dab0 100644
7f1c5b
--- a/migration/postcopy-ram.c
7f1c5b
+++ b/migration/postcopy-ram.c
7f1c5b
@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
7f1c5b
     }
7f1c5b
 }
7f1c5b
 
7f1c5b
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
7f1c5b
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
7f1c5b
 {
7f1c5b
     /*
7f1c5b
      * The new loading channel has its own threads, so it needs to be
7f1c5b
@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
7f1c5b
     qemu_file_set_blocking(file, true);
7f1c5b
     mis->postcopy_qemufile_dst = file;
7f1c5b
     trace_postcopy_preempt_new_channel();
7f1c5b
-
7f1c5b
-    /* Start the migration immediately */
7f1c5b
-    return true;
7f1c5b
 }
7f1c5b
 
7f1c5b
 /*
7f1c5b
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
7f1c5b
index 6147bf7d1d..25881c4127 100644
7f1c5b
--- a/migration/postcopy-ram.h
7f1c5b
+++ b/migration/postcopy-ram.h
7f1c5b
@@ -190,7 +190,7 @@ enum PostcopyChannels {
7f1c5b
     RAM_CHANNEL_MAX,
7f1c5b
 };
7f1c5b
 
7f1c5b
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
7f1c5b
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
7f1c5b
 int postcopy_preempt_setup(MigrationState *s, Error **errp);
7f1c5b
 int postcopy_preempt_wait_channel(MigrationState *s);
7f1c5b
 
7f1c5b
-- 
7f1c5b
2.31.1
7f1c5b