thebeanogamer / rpms / qemu-kvm

Forked from rpms/qemu-kvm 5 months ago
Clone

Blame SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch

586cba
From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001
586cba
From: Leonardo Bras <leobras@redhat.com>
586cba
Date: Fri, 13 May 2022 03:28:37 -0300
586cba
Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration
586cba
 (multifd-zero-copy)
586cba
MIME-Version: 1.0
586cba
Content-Type: text/plain; charset=UTF-8
586cba
Content-Transfer-Encoding: 8bit
586cba
586cba
RH-Author: Leonardo Brás <leobras@redhat.com>
586cba
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
586cba
RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm)
586cba
RH-Bugzilla: 1968509
586cba
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
586cba
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
586cba
RH-Acked-by: Peter Xu <peterx@redhat.com>
586cba
586cba
Implement zero copy send on nocomp_send_write(), by making use of QIOChannel
586cba
writev + flags & flush interface.
586cba
586cba
Change multifd_send_sync_main() so flush_zero_copy() can be called
586cba
after each iteration in order to make sure all dirty pages are sent before
586cba
a new iteration is started. It will also flush at the beginning and at the
586cba
end of migration.
586cba
586cba
Also make it return -1 if flush_zero_copy() fails, in order to cancel
586cba
the migration process, and avoid resuming the guest in the target host
586cba
without receiving all current RAM.
586cba
586cba
This will work fine on RAM migration because the RAM pages are not usually freed,
586cba
and there is no problem on changing the pages content between writev_zero_copy() and
586cba
the actual sending of the buffer, because this change will dirty the page and
586cba
cause it to be re-sent on a next iteration anyway.
586cba
586cba
A lot of locked memory may be needed in order to use multifd migration
586cba
with zero-copy enabled, so disabling the feature should be necessary for
586cba
low-privileged users trying to perform multifd migrations.
586cba
586cba
Signed-off-by: Leonardo Bras <leobras@redhat.com>
586cba
Reviewed-by: Peter Xu <peterx@redhat.com>
586cba
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
586cba
Message-Id: <20220513062836.965425-9-leobras@redhat.com>
586cba
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
586cba
(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b)
586cba
Signed-off-by: Leonardo Bras <leobras@redhat.com>
586cba
---
586cba
 migration/migration.c | 11 ++++++++++-
586cba
 migration/multifd.c   | 37 +++++++++++++++++++++++++++++++++++--
586cba
 migration/multifd.h   |  2 ++
586cba
 migration/socket.c    |  5 +++--
586cba
 4 files changed, 50 insertions(+), 5 deletions(-)
586cba
586cba
diff --git a/migration/migration.c b/migration/migration.c
586cba
index d91efb66fe..102236fba0 100644
586cba
--- a/migration/migration.c
586cba
+++ b/migration/migration.c
586cba
@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
586cba
         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
586cba
         return false;
586cba
     }
586cba
-
586cba
+#ifdef CONFIG_LINUX
586cba
+    if (params->zero_copy_send &&
586cba
+        (!migrate_use_multifd() ||
586cba
+         params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
586cba
+         (params->tls_creds && *params->tls_creds))) {
586cba
+        error_setg(errp,
586cba
+                   "Zero copy only available for non-compressed non-TLS multifd migration");
586cba
+        return false;
586cba
+    }
586cba
+#endif
586cba
     return true;
586cba
 }
586cba
 
586cba
diff --git a/migration/multifd.c b/migration/multifd.c
586cba
index 8fca6c970e..0b5b41c53f 100644
586cba
--- a/migration/multifd.c
586cba
+++ b/migration/multifd.c
586cba
@@ -571,6 +571,7 @@ void multifd_save_cleanup(void)
586cba
 int multifd_send_sync_main(QEMUFile *f)
586cba
 {
586cba
     int i;
586cba
+    bool flush_zero_copy;
586cba
 
586cba
     if (!migrate_use_multifd()) {
586cba
         return 0;
586cba
@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f)
586cba
             return -1;
586cba
         }
586cba
     }
586cba
+
586cba
+    /*
586cba
+     * When using zero-copy, it's necessary to flush the pages before any of
586cba
+     * the pages can be sent again, so we'll make sure the new version of the
586cba
+     * pages will always arrive _later_ than the old pages.
586cba
+     *
586cba
+     * Currently we achieve this by flushing the zero-page requested writes
586cba
+     * per ram iteration, but in the future we could potentially optimize it
586cba
+     * to be less frequent, e.g. only after we finished one whole scanning of
586cba
+     * all the dirty bitmaps.
586cba
+     */
586cba
+
586cba
+    flush_zero_copy = migrate_use_zero_copy_send();
586cba
+
586cba
     for (i = 0; i < migrate_multifd_channels(); i++) {
586cba
         MultiFDSendParams *p = &multifd_send_state->params[i];
586cba
 
586cba
@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f)
586cba
         ram_counters.transferred += p->packet_len;
586cba
         qemu_mutex_unlock(&p->mutex);
586cba
         qemu_sem_post(&p->sem);
586cba
+
586cba
+        if (flush_zero_copy && p->c) {
586cba
+            int ret;
586cba
+            Error *err = NULL;
586cba
+
586cba
+            ret = qio_channel_flush(p->c, &err;;
586cba
+            if (ret < 0) {
586cba
+                error_report_err(err);
586cba
+                return -1;
586cba
+            }
586cba
+        }
586cba
     }
586cba
     for (i = 0; i < migrate_multifd_channels(); i++) {
586cba
         MultiFDSendParams *p = &multifd_send_state->params[i];
586cba
@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque)
586cba
                 p->iov[0].iov_base = p->packet;
586cba
             }
586cba
 
586cba
-            ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
586cba
-                                         &local_err);
586cba
+            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
586cba
+                                              0, p->write_flags, &local_err);
586cba
             if (ret != 0) {
586cba
                 break;
586cba
             }
586cba
@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp)
586cba
         /* We need one extra place for the packet header */
586cba
         p->iov = g_new0(struct iovec, page_count + 1);
586cba
         p->normal = g_new0(ram_addr_t, page_count);
586cba
+
586cba
+        if (migrate_use_zero_copy_send()) {
586cba
+            p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
586cba
+        } else {
586cba
+            p->write_flags = 0;
586cba
+        }
586cba
+
586cba
         socket_send_channel_create(multifd_new_send_channel_async, p);
586cba
     }
586cba
 
586cba
diff --git a/migration/multifd.h b/migration/multifd.h
586cba
index cd495195ce..7ec688fb4f 100644
586cba
--- a/migration/multifd.h
586cba
+++ b/migration/multifd.h
586cba
@@ -96,6 +96,8 @@ typedef struct {
586cba
     uint32_t packet_len;
586cba
     /* pointer to the packet */
586cba
     MultiFDPacket_t *packet;
586cba
+    /* multifd flags for sending ram */
586cba
+    int write_flags;
586cba
     /* multifd flags for each packet */
586cba
     uint32_t flags;
586cba
     /* size of the next packet that contains pages */
586cba
diff --git a/migration/socket.c b/migration/socket.c
586cba
index 3754d8f72c..4fd5e85f50 100644
586cba
--- a/migration/socket.c
586cba
+++ b/migration/socket.c
586cba
@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task,
586cba
 
586cba
     trace_migration_socket_outgoing_connected(data->hostname);
586cba
 
586cba
-    if (migrate_use_zero_copy_send()) {
586cba
-        error_setg(&err, "Zero copy send not available in migration");
586cba
+    if (migrate_use_zero_copy_send() &&
586cba
+        !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
586cba
+        error_setg(&err, "Zero copy send feature not detected in host kernel");
586cba
     }
586cba
 
586cba
 out:
586cba
-- 
586cba
2.35.3
586cba