|
|
0727d3 |
From 7a7e2191f1ac4114380248cbd3c6ab7425250747 Mon Sep 17 00:00:00 2001
|
|
|
0727d3 |
From: Leonardo Bras <leobras@redhat.com>
|
|
|
0727d3 |
Date: Wed, 18 May 2022 02:52:25 -0300
|
|
|
0727d3 |
Subject: [PATCH 23/37] multifd: Implement zero copy write in multifd migration
|
|
|
0727d3 |
(multifd-zero-copy)
|
|
|
0727d3 |
MIME-Version: 1.0
|
|
|
0727d3 |
Content-Type: text/plain; charset=UTF-8
|
|
|
0727d3 |
Content-Transfer-Encoding: 8bit
|
|
|
0727d3 |
|
|
|
0727d3 |
RH-Author: Leonardo Brás <leobras@redhat.com>
|
|
|
0727d3 |
RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
|
|
|
0727d3 |
RH-Commit: [23/26] 904ce3909cfef62dd84cc7d3c6a3482e7e6f28e9
|
|
|
0727d3 |
RH-Bugzilla: 2072049
|
|
|
0727d3 |
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
|
0727d3 |
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
|
0727d3 |
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
|
0727d3 |
|
|
|
0727d3 |
Implement zero copy send on nocomp_send_write(), by making use of QIOChannel
|
|
|
0727d3 |
writev + flags & flush interface.
|
|
|
0727d3 |
|
|
|
0727d3 |
Change multifd_send_sync_main() so flush_zero_copy() can be called
|
|
|
0727d3 |
after each iteration in order to make sure all dirty pages are sent before
|
|
|
0727d3 |
a new iteration is started. It will also flush at the beginning and at the
|
|
|
0727d3 |
end of migration.
|
|
|
0727d3 |
|
|
|
0727d3 |
Also make it return -1 if flush_zero_copy() fails, in order to cancel
|
|
|
0727d3 |
the migration process, and avoid resuming the guest in the target host
|
|
|
0727d3 |
without receiving all current RAM.
|
|
|
0727d3 |
|
|
|
0727d3 |
This will work fine on RAM migration because the RAM pages are not usually freed,
|
|
|
0727d3 |
and there is no problem on changing the pages content between writev_zero_copy() and
|
|
|
0727d3 |
the actual sending of the buffer, because this change will dirty the page and
|
|
|
0727d3 |
cause it to be re-sent on a next iteration anyway.
|
|
|
0727d3 |
|
|
|
0727d3 |
A lot of locked memory may be needed in order to use multifd migration
|
|
|
0727d3 |
with zero-copy enabled, so disabling the feature should be necessary for
|
|
|
0727d3 |
low-privileged users trying to perform multifd migrations.
|
|
|
0727d3 |
|
|
|
0727d3 |
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
|
|
0727d3 |
Reviewed-by: Peter Xu <peterx@redhat.com>
|
|
|
0727d3 |
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
|
0727d3 |
Message-Id: <20220513062836.965425-9-leobras@redhat.com>
|
|
|
0727d3 |
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
|
0727d3 |
(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b)
|
|
|
0727d3 |
Signed-off-by: Leonardo Bras <leobras@redhat.com>
|
|
|
0727d3 |
---
|
|
|
0727d3 |
migration/migration.c | 11 ++++++++++-
|
|
|
0727d3 |
migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++--
|
|
|
0727d3 |
migration/multifd.h | 2 ++
|
|
|
0727d3 |
migration/socket.c | 5 +++--
|
|
|
0727d3 |
4 files changed, 50 insertions(+), 5 deletions(-)
|
|
|
0727d3 |
|
|
|
0727d3 |
diff --git a/migration/migration.c b/migration/migration.c
|
|
|
0727d3 |
index 8e28f2ee41..5357efd348 100644
|
|
|
0727d3 |
--- a/migration/migration.c
|
|
|
0727d3 |
+++ b/migration/migration.c
|
|
|
0727d3 |
@@ -1471,7 +1471,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
|
|
|
0727d3 |
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
|
|
|
0727d3 |
return false;
|
|
|
0727d3 |
}
|
|
|
0727d3 |
-
|
|
|
0727d3 |
+#ifdef CONFIG_LINUX
|
|
|
0727d3 |
+ if (params->zero_copy_send &&
|
|
|
0727d3 |
+ (!migrate_use_multifd() ||
|
|
|
0727d3 |
+ params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
|
|
|
0727d3 |
+ (params->tls_creds && *params->tls_creds))) {
|
|
|
0727d3 |
+ error_setg(errp,
|
|
|
0727d3 |
+ "Zero copy only available for non-compressed non-TLS multifd migration");
|
|
|
0727d3 |
+ return false;
|
|
|
0727d3 |
+ }
|
|
|
0727d3 |
+#endif
|
|
|
0727d3 |
return true;
|
|
|
0727d3 |
}
|
|
|
0727d3 |
|
|
|
0727d3 |
diff --git a/migration/multifd.c b/migration/multifd.c
|
|
|
0727d3 |
index 193f70cdba..90ab4c4346 100644
|
|
|
0727d3 |
--- a/migration/multifd.c
|
|
|
0727d3 |
+++ b/migration/multifd.c
|
|
|
0727d3 |
@@ -576,6 +576,7 @@ void multifd_save_cleanup(void)
|
|
|
0727d3 |
int multifd_send_sync_main(QEMUFile *f)
|
|
|
0727d3 |
{
|
|
|
0727d3 |
int i;
|
|
|
0727d3 |
+ bool flush_zero_copy;
|
|
|
0727d3 |
|
|
|
0727d3 |
if (!migrate_use_multifd()) {
|
|
|
0727d3 |
return 0;
|
|
|
0727d3 |
@@ -586,6 +587,20 @@ int multifd_send_sync_main(QEMUFile *f)
|
|
|
0727d3 |
return -1;
|
|
|
0727d3 |
}
|
|
|
0727d3 |
}
|
|
|
0727d3 |
+
|
|
|
0727d3 |
+ /*
|
|
|
0727d3 |
+ * When using zero-copy, it's necessary to flush the pages before any of
|
|
|
0727d3 |
+ * the pages can be sent again, so we'll make sure the new version of the
|
|
|
0727d3 |
+ * pages will always arrive _later_ than the old pages.
|
|
|
0727d3 |
+ *
|
|
|
0727d3 |
+ * Currently we achieve this by flushing the zero-page requested writes
|
|
|
0727d3 |
+ * per ram iteration, but in the future we could potentially optimize it
|
|
|
0727d3 |
+ * to be less frequent, e.g. only after we finished one whole scanning of
|
|
|
0727d3 |
+ * all the dirty bitmaps.
|
|
|
0727d3 |
+ */
|
|
|
0727d3 |
+
|
|
|
0727d3 |
+ flush_zero_copy = migrate_use_zero_copy_send();
|
|
|
0727d3 |
+
|
|
|
0727d3 |
for (i = 0; i < migrate_multifd_channels(); i++) {
|
|
|
0727d3 |
MultiFDSendParams *p = &multifd_send_state->params[i];
|
|
|
0727d3 |
|
|
|
0727d3 |
@@ -607,6 +622,17 @@ int multifd_send_sync_main(QEMUFile *f)
|
|
|
0727d3 |
ram_counters.transferred += p->packet_len;
|
|
|
0727d3 |
qemu_mutex_unlock(&p->mutex);
|
|
|
0727d3 |
qemu_sem_post(&p->sem);
|
|
|
0727d3 |
+
|
|
|
0727d3 |
+ if (flush_zero_copy && p->c) {
|
|
|
0727d3 |
+ int ret;
|
|
|
0727d3 |
+ Error *err = NULL;
|
|
|
0727d3 |
+
|
|
|
0727d3 |
+ ret = qio_channel_flush(p->c, &err;;
|
|
|
0727d3 |
+ if (ret < 0) {
|
|
|
0727d3 |
+ error_report_err(err);
|
|
|
0727d3 |
+ return -1;
|
|
|
0727d3 |
+ }
|
|
|
0727d3 |
+ }
|
|
|
0727d3 |
}
|
|
|
0727d3 |
for (i = 0; i < migrate_multifd_channels(); i++) {
|
|
|
0727d3 |
MultiFDSendParams *p = &multifd_send_state->params[i];
|
|
|
0727d3 |
@@ -691,8 +717,8 @@ static void *multifd_send_thread(void *opaque)
|
|
|
0727d3 |
p->iov[0].iov_base = p->packet;
|
|
|
0727d3 |
}
|
|
|
0727d3 |
|
|
|
0727d3 |
- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
|
|
|
0727d3 |
- &local_err);
|
|
|
0727d3 |
+ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
|
|
|
0727d3 |
+ 0, p->write_flags, &local_err);
|
|
|
0727d3 |
if (ret != 0) {
|
|
|
0727d3 |
break;
|
|
|
0727d3 |
}
|
|
|
0727d3 |
@@ -933,6 +959,13 @@ int multifd_save_setup(Error **errp)
|
|
|
0727d3 |
/* We need one extra place for the packet header */
|
|
|
0727d3 |
p->iov = g_new0(struct iovec, page_count + 1);
|
|
|
0727d3 |
p->normal = g_new0(ram_addr_t, page_count);
|
|
|
0727d3 |
+
|
|
|
0727d3 |
+ if (migrate_use_zero_copy_send()) {
|
|
|
0727d3 |
+ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
|
|
|
0727d3 |
+ } else {
|
|
|
0727d3 |
+ p->write_flags = 0;
|
|
|
0727d3 |
+ }
|
|
|
0727d3 |
+
|
|
|
0727d3 |
socket_send_channel_create(multifd_new_send_channel_async, p);
|
|
|
0727d3 |
}
|
|
|
0727d3 |
|
|
|
0727d3 |
diff --git a/migration/multifd.h b/migration/multifd.h
|
|
|
0727d3 |
index 92de878155..11d5e273e6 100644
|
|
|
0727d3 |
--- a/migration/multifd.h
|
|
|
0727d3 |
+++ b/migration/multifd.h
|
|
|
0727d3 |
@@ -95,6 +95,8 @@ typedef struct {
|
|
|
0727d3 |
uint32_t packet_len;
|
|
|
0727d3 |
/* pointer to the packet */
|
|
|
0727d3 |
MultiFDPacket_t *packet;
|
|
|
0727d3 |
+ /* multifd flags for sending ram */
|
|
|
0727d3 |
+ int write_flags;
|
|
|
0727d3 |
/* multifd flags for each packet */
|
|
|
0727d3 |
uint32_t flags;
|
|
|
0727d3 |
/* size of the next packet that contains pages */
|
|
|
0727d3 |
diff --git a/migration/socket.c b/migration/socket.c
|
|
|
0727d3 |
index 3754d8f72c..4fd5e85f50 100644
|
|
|
0727d3 |
--- a/migration/socket.c
|
|
|
0727d3 |
+++ b/migration/socket.c
|
|
|
0727d3 |
@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task,
|
|
|
0727d3 |
|
|
|
0727d3 |
trace_migration_socket_outgoing_connected(data->hostname);
|
|
|
0727d3 |
|
|
|
0727d3 |
- if (migrate_use_zero_copy_send()) {
|
|
|
0727d3 |
- error_setg(&err, "Zero copy send not available in migration");
|
|
|
0727d3 |
+ if (migrate_use_zero_copy_send() &&
|
|
|
0727d3 |
+ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
|
|
|
0727d3 |
+ error_setg(&err, "Zero copy send feature not detected in host kernel");
|
|
|
0727d3 |
}
|
|
|
0727d3 |
|
|
|
0727d3 |
out:
|
|
|
0727d3 |
--
|
|
|
0727d3 |
2.35.3
|
|
|
0727d3 |
|