diff --git a/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch new file mode 100644 index 0000000..fb67d64 --- /dev/null +++ b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch @@ -0,0 +1,87 @@ +From aba4d52b3c06aaf5a7553db6dadcb02645e153f1 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:12 -0300 +Subject: [PATCH 30/34] Add dirty-sync-missed-zero-copy migration stat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [30/34] 95d0255ea03cb7c986dc64645e95e10a5fbe0f9a +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Leonardo Bras +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220711211112.18951-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 2 ++ + monitor/hmp-cmds.c | 5 +++++ + qapi/migration.json | 7 ++++++- + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 87b4a6c3f9..a3e0ac954c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1006,6 +1006,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_missed_zero_copy = ++ ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 8c384dc1b2..f7216ab5d0 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); + } ++ if (info->ram->dirty_sync_missed_zero_copy) { ++ monitor_printf(mon, ++ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", ++ info->ram->dirty_sync_missed_zero_copy); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index c8ec260ab0..94bc5c69db 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -55,6 +55,10 @@ + # @postcopy-bytes: The number of bytes sent during the post-copy phase + # (since 7.0). + # ++# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could ++# not avoid copying dirty pages. This is between ++# 0 and @dirty-sync-count * @multifd-channels. ++# (since 7.1) + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -65,7 +69,8 @@ + 'postcopy-requests' : 'int', 'page-size' : 'int', + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', +- 'postcopy-bytes' : 'uint64' } } ++ 'postcopy-bytes' : 'uint64', ++ 'dirty-sync-missed-zero-copy' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch new file mode 100644 index 0000000..b0f1766 --- /dev/null +++ b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -0,0 +1,420 @@ +From dc840cee933cfc1790b7624c88052f6deb43101d Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 17/34] QIOChannel: Add flags on io_writev and introduce + io_flush callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [17/34] 8ebb6301a83816937d7b87709cf906e1a9c16b01 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add flags to io_writev and introduce io_flush as optional callback to +QIOChannelClass, allowing the implementation of zero copy writes by +subclasses. + +How to use them: +- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), +- Wait write completion with qio_channel_flush(). + +Notes: +As some zero copy write implementations work asynchronously, it's +recommended to keep the write buffer untouched until the return of +qio_channel_flush(), to avoid the risk of sending an updated buffer +instead of the buffer state during write. + +As io_flush callback is optional, if a subclass does not implement it, then: +- io_flush will return 0 without changing anything. + +Also, some functions like qio_channel_writev_full_all() were adapted to +receive a flag parameter. That allows shared code between zero copy and +non-zero copy writev, and also an easier implementation on new flags. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) +Signed-off-by: Leonardo Bras +--- + chardev/char-io.c | 2 +- + hw/remote/mpqemu-link.c | 2 +- + include/io/channel.h | 38 +++++++++++++++++++++- + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 2 ++ + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 49 +++++++++++++++++++++++------ + migration/rdma.c | 1 + + scsi/pr-manager-helper.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + 13 files changed, 88 insertions(+), 14 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 8ced184160..4451128cba 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, + + ret = qio_channel_writev_full( + ioc, &iov, 1, +- fds, nfds, NULL); ++ fds, nfds, 0, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + if (offset) { + return offset; +diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c +index 7e841820e5..e8f556bd27 100644 +--- a/hw/remote/mpqemu-link.c ++++ b/hw/remote/mpqemu-link.c +@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) + } + + if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), +- fds, nfds, errp)) { ++ fds, nfds, 0, errp)) { + ret = true; + } else { + trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); +diff --git a/include/io/channel.h b/include/io/channel.h +index 88988979f8..c680ee7480 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_ERR_BLOCK -2 + ++#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + }; + + +@@ -104,6 +107,7 @@ struct QIOChannelClass { + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, +@@ -136,6 +140,8 @@ struct QIOChannelClass { + IOHandler *io_read, + IOHandler *io_write, + void *opaque); ++ int (*io_flush)(QIOChannel *ioc, ++ Error **errp); + }; + + /* General I/O handling functions */ +@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the +@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + + /** +@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * +@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * to be written, yielding from the current coroutine + * if required. + * ++ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, ++ * instead of waiting for all requested data to be written, ++ * this function will wait until it's all queued for writing. ++ * In this case, if the buffer gets changed between queueing and ++ * sending, the updated buffer will be sent. If this is not a ++ * desired behavior, it's suggested to call qio_channel_flush() ++ * before reusing the buffer. ++ * + * Returns: 0 if all bytes were written, or -1 on error + */ + +@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp); ++ int flags, Error **errp); ++ ++/** ++ * qio_channel_flush: ++ * @ioc: the channel object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Will block until every packet queued with ++ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY ++ * is sent, or return in case of any error. ++ * ++ * If not implemented, acts as a no-op, and returns 0. ++ * ++ * Returns -1 if any error is found, ++ * 1 if every send failed to use zero copy. ++ * 0 otherwise. ++ */ ++ ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp); + + #endif /* QIO_CHANNEL_H */ +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index baa4e2b089..bf52011be2 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index b2a9e27138..5ff1691bad 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index c4bf799a80..348a48545e 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 606ec97cf7..bfbd64787e 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 2ae1b92fc0..4ce890a538 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 70889bb54d..035dd6075b 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index e8b019dc36..0640941ac5 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + +- if ((fds || nfds) && +- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ if (fds || nfds) { ++ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support file descriptor passing"); ++ return -1; ++ } ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ error_setg_errno(errp, EINVAL, ++ "Zero Copy does not support file descriptor passing"); ++ return -1; ++ } ++ } ++ ++ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg_errno(errp, EINVAL, +- "Channel does not support file descriptor passing"); ++ "Requested Zero Copy feature is not available"); + return -1; + } + +- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); ++ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); + } + + int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp) ++ int flags, Error **errp) + { + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); +@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + + while (nlocal_iov > 0) { + ssize_t len; +- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, +- errp); ++ ++ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, ++ nfds, flags, errp); ++ + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); +@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); + } + + +@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; +- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); + } + + +@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, + return klass->io_seek(ioc, offset, whence, errp); + } + ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); ++ ++ if (!klass->io_flush || ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ return 0; ++ } ++ ++ return klass->io_flush(ioc, errp); ++} ++ + + static void qio_channel_restart_read(void *opaque) + { +diff --git a/migration/rdma.c b/migration/rdma.c +index f5d3bbe7e9..54acd2000e 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2833,6 +2833,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c +index 451c7631b7..3be52a98d5 100644 +--- a/scsi/pr-manager-helper.c ++++ b/scsi/pr-manager-helper.c +@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, + iov.iov_base = (void *)buf; + iov.iov_len = sz; + n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, +- nfds ? &fd : NULL, nfds, errp); ++ nfds ? &fd : NULL, nfds, 0, errp); + + if (n_written <= 0) { + assert(n_written != QIO_CHANNEL_ERR_BLOCK); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index c49eec1f03..6713886d02 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iosend), + fdsend, + G_N_ELEMENTS(fdsend), ++ 0, + &error_abort); + + qio_channel_readv_full(dst, +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch new file mode 100644 index 0000000..0dd4a82 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch @@ -0,0 +1,56 @@ +From f81698b323294b330a5dfb7b9eabff025596bbde Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 4 Aug 2022 04:10:43 -0300 +Subject: [PATCH 34/34] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [34/34] 549d876ec7108bd11d01754bd1b893ba3e79deb9 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +For using MSG_ZEROCOPY, there are two steps: +1 - io_writev() the packet, which enqueues the packet for sending, and +2 - io_flush(), which gets confirmation that all packets got correctly sent + +Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will +be reported in (1), but it will fail in the first time (2) happens. + +This happens because (2) currently checks for cmsg_level & cmsg_type +associated with IPV4 only, before reporting any error. + +Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable +support for MSG_ZEROCOPY + IPV6 + +Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index cf0d67c51b..6010ad7017 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + } + + cm = CMSG_FIRSTHDR(&msg); +- if (cm->cmsg_level != SOL_IP && +- cm->cmsg_type != IP_RECVERR) { ++ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && ++ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { + error_setg_errno(errp, EPROTOTYPE, + "Wrong cmsg in errqueue"); + return -1; +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch new file mode 100644 index 0000000..4c25590 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch @@ -0,0 +1,65 @@ +From 9995a5367d511f8597e4006841853eb9b5888065 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:11 -0300 +Subject: [PATCH 29/34] QIOChannelSocket: Fix zero-copy flush returning code 1 + when nothing sent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [29/34] d383ee721a8b57a4c3b70e1307cbf7db9e22d395 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently +returns 1. This return code should be used only when Linux fails to use +MSG_ZEROCOPY on a lot of sendmsg(). + +Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) +was attempted. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Message-Id: <20220711211112.18951-2-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index df858da924..cf0d67c51b 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + struct cmsghdr *cm; + char control[CMSG_SPACE(sizeof(*serr))]; + int received; +- int ret = 1; ++ int ret; ++ ++ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { ++ return 0; ++ } + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + memset(control, 0, sizeof(control)); + ++ ret = 1; ++ + while (sioc->zero_copy_sent < sioc->zero_copy_queued) { + received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); + if (received < 0) { +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch new file mode 100644 index 0000000..cab53b4 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -0,0 +1,58 @@ +From 9a4ecf0b3cfccd31a1d41716e3a4249a1d53455c Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:43 -0300 +Subject: [PATCH 25/34] QIOChannelSocket: Fix zero-copy send so socket flush + works +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [25/34] cf8dc62075bc8b9aa2621315842a2b2458e9cd82 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial +part of the flushing mechanism got missing: incrementing zero_copy_queued. + +Without that, the flushing interface becomes a no-op, and there is no +guarantee the buffer is really sent. + +This can go as bad as causing a corruption in RAM during migration. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Reported-by: 徐闯 +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7d37b39de7..df858da924 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + "Unable to write to socket"); + return -1; + } ++ ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sioc->zero_copy_queued++; ++ } ++ + return ret; + } + #else /* WIN32 */ +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch new file mode 100644 index 0000000..a86096d --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -0,0 +1,249 @@ +From 118f6f61c9ca27bb112d1e39367510d2a45a72fb Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 18/34] QIOChannelSocket: Implement io_writev zero copy flag & + io_flush for CONFIG_LINUX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [18/34] b7f50e8485dc5e01c69b2070915592b28bdafde6 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +For CONFIG_LINUX, implement the new zero copy flag and the optional callback +io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY +feature is available in the host kernel, which is checked on +qio_channel_socket_connect_sync() + +qio_channel_socket_flush() was implemented by counting how many times +sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the +socket's error queue, in order to find how many of them finished sending. +Flush will loop until those counters are the same, or until some error occurs. + +Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: +1: Buffer +- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, +some caution is necessary to avoid overwriting any buffer before it's sent. +If something like this happen, a newer version of the buffer may be sent instead. +- If this is a problem, it's recommended to call qio_channel_flush() before freeing +or re-using the buffer. + +2: Locked memory +- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and +unlocked after it's sent. +- Depending on the size of each buffer, and how often it's sent, it may require +a larger amount of locked memory than usually available to non-root user. +- If the required amount of locked memory is not available, writev_zero_copy +will return an error, which can abort an operation like migration, +- Because of this, when an user code wants to add zero copy as a feature, it +requires a mechanism to disable it, so it can still be accessible to less +privileged users. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) +Signed-off-by: Leonardo Bras +--- + include/io/channel-socket.h | 2 + + io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 4 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index e747e63514..513c428fe4 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -47,6 +47,8 @@ struct QIOChannelSocket { + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; ++ ssize_t zero_copy_queued; ++ ssize_t zero_copy_sent; + }; + + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index bfbd64787e..38a46ba213 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -26,6 +26,14 @@ + #include "io/channel-watch.h" + #include "trace.h" + #include "qapi/clone-visitor.h" ++#ifdef CONFIG_LINUX ++#include ++#include ++ ++#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) ++#define QEMU_MSG_ZEROCOPY ++#endif ++#endif + + #define SOCKET_MAX_FDS 16 + +@@ -55,6 +63,8 @@ qio_channel_socket_new(void) + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; ++ sioc->zero_copy_queued = 0; ++ sioc->zero_copy_sent = 0; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + return -1; + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ int ret, v = 1; ++ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); ++ if (ret == 0) { ++ /* Zero copy available on host */ ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); ++ } ++#endif ++ + return 0; + } + +@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; ++ int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sflags = MSG_ZEROCOPY; ++ } ++#endif ++ + retry: +- ret = sendmsg(sioc->fd, &msg, 0); ++ ret = sendmsg(sioc->fd, &msg, sflags); + if (ret <= 0) { +- if (errno == EAGAIN) { ++ switch (errno) { ++ case EAGAIN: + return QIO_CHANNEL_ERR_BLOCK; +- } +- if (errno == EINTR) { ++ case EINTR: + goto retry; ++#ifdef QEMU_MSG_ZEROCOPY ++ case ENOBUFS: ++ if (sflags & MSG_ZEROCOPY) { ++ error_setg_errno(errp, errno, ++ "Process can't lock enough memory for using MSG_ZEROCOPY"); ++ return -1; ++ } ++ break; ++#endif + } ++ + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; +@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + } + #endif /* WIN32 */ + ++ ++#ifdef QEMU_MSG_ZEROCOPY ++static int qio_channel_socket_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ++ struct msghdr msg = {}; ++ struct sock_extended_err *serr; ++ struct cmsghdr *cm; ++ char control[CMSG_SPACE(sizeof(*serr))]; ++ int received; ++ int ret = 1; ++ ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ memset(control, 0, sizeof(control)); ++ ++ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { ++ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); ++ if (received < 0) { ++ switch (errno) { ++ case EAGAIN: ++ /* Nothing on errqueue, wait until something is available */ ++ qio_channel_wait(ioc, G_IO_ERR); ++ continue; ++ case EINTR: ++ continue; ++ default: ++ error_setg_errno(errp, errno, ++ "Unable to read errqueue"); ++ return -1; ++ } ++ } ++ ++ cm = CMSG_FIRSTHDR(&msg); ++ if (cm->cmsg_level != SOL_IP && ++ cm->cmsg_type != IP_RECVERR) { ++ error_setg_errno(errp, EPROTOTYPE, ++ "Wrong cmsg in errqueue"); ++ return -1; ++ } ++ ++ serr = (void *) CMSG_DATA(cm); ++ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { ++ error_setg_errno(errp, serr->ee_errno, ++ "Error on socket"); ++ return -1; ++ } ++ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { ++ error_setg_errno(errp, serr->ee_origin, ++ "Error not from zero copy"); ++ return -1; ++ } ++ ++ /* No errors, count successfully finished sendmsg()*/ ++ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; ++ ++ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ ++ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { ++ ret = 0; ++ } ++ } ++ ++ return ret; ++} ++ ++#endif /* QEMU_MSG_ZEROCOPY */ ++ + static int + qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, +@@ -789,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; ++#ifdef QEMU_MSG_ZEROCOPY ++ ioc_klass->io_flush = qio_channel_socket_flush; ++#endif + } + + static const TypeInfo qio_channel_socket_info = { +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch new file mode 100644 index 0000000..a63f0ee --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -0,0 +1,82 @@ +From f1e21f3d46e1481a5cdd2f297831742b5b2d8ecf Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:42 -0300 +Subject: [PATCH 24/34] QIOChannelSocket: Introduce assert and reduce ifdefs to + improve readability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [24/34] 0dcd79e1e89c881e56c3ef2e421910176b03d881 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were +introduced, particularly at qio_channel_socket_writev(). + +Rewrite some of those changes so it's easier to read. + +Also, introduce an assert to help detect incorrect zero-copy usage is when +it's disabled on build. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached +(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 38a46ba213..7d37b39de7 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + +-#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++#ifdef QEMU_MSG_ZEROCOPY + sflags = MSG_ZEROCOPY; +- } ++#else ++ /* ++ * We expect QIOChannel class entry point to have ++ * blocked this code path already ++ */ ++ g_assert_not_reached(); + #endif ++ } + + retry: + ret = sendmsg(sioc->fd, &msg, sflags); +@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + return QIO_CHANNEL_ERR_BLOCK; + case EINTR: + goto retry; +-#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: +- if (sflags & MSG_ZEROCOPY) { ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +-#endif + } + + error_setg_errno(errp, errno, +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch new file mode 100644 index 0000000..1266128 --- /dev/null +++ b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -0,0 +1,49 @@ +From 286d9e4512a3b7ab6e2a1ce6b4a872e5defb0ffe Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:12 +0100 +Subject: [PATCH 2/2] linux-aio: explain why max batch is checked in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 209: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] c6194a9929e7e3807c5402b5364ab9dc2edf420c +RH-Bugzilla: 2109570 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +It may not be obvious why laio_io_unplug() checks max batch. I discussed +this with Stefano and have added a comment summarizing the reason. + +Cc: Stefano Garzarella +Cc: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 77f17ad596..85650c4222 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -362,6 +362,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + assert(s->io_q.plugged); + s->io_q.plugged--; + ++ /* ++ * Why max batch checking is performed here: ++ * Another BDS may have queued requests with a higher dev_max_batch and ++ * therefore in_queue could now exceed our dev_max_batch. Re-check the max ++ * batch so we can honor our device's dev_max_batch. ++ */ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..d01fe33 --- /dev/null +++ b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,56 @@ +From ad1844e7e2294fa71bc07f9d1da6d10150ba9607 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH 1/2] linux-aio: fix unbalanced plugged counter in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 209: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] 3a73bdb8237cf99c5264a6e1caac632494412953 +RH-Bugzilla: 2109570 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index f53ae72e21..77f17ad596 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -360,8 +360,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch new file mode 100644 index 0000000..338ddb8 --- /dev/null +++ b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch @@ -0,0 +1,106 @@ +From c7ff8b8916f28928185bbe937a5701e1770ab5f4 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 20/34] migration: Add migrate_use_tls() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [20/34] 41a1ec2f4ef5c873ed80cf055bb5a582e2273495 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A lot of places check parameters.tls_creds in order to evaluate if TLS is +in use, and sometimes call migrate_get_current() just for that test. + +Add new helper function migrate_use_tls() in order to simplify testing +for TLS usage. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-6-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) +Signed-off-by: Leonardo Bras +--- + migration/channel.c | 3 +-- + migration/migration.c | 9 +++++++++ + migration/migration.h | 1 + + migration/multifd.c | 5 +---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c4fc000a1a..086b5c0d8b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); +diff --git a/migration/migration.c b/migration/migration.c +index b0fc3f68bd..8e28f2ee41 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2568,6 +2568,15 @@ bool migrate_use_zero_copy_send(void) + } + #endif + ++int migrate_use_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 908098939f..9396b7e90a 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -344,6 +344,7 @@ bool migrate_use_zero_copy_send(void); + #else + #define migrate_use_zero_copy_send() (false) + #endif ++int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 3725226400..e53811f04a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -789,14 +789,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) + { +- MigrationState *s = migrate_get_current(); +- + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + + if (!error) { +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch new file mode 100644 index 0000000..bae187a --- /dev/null +++ b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -0,0 +1,250 @@ +From c71da4b1c1c4cf089f74394ffc596d0fd0235800 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 19/34] migration: Add zero-copy-send parameter for QMP/HMP for + Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [19/34] 96e64f4beb41ffc6cf34341114666598d3d53aeb +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add property that allows zero-copy migration of memory pages +on the sending side, and also includes a helper function +migrate_use_zero_copy_send() to check if it's enabled. + +No code is introduced to actually do the migration, but it allow +future implementations to enable/disable this feature. + +On non-Linux builds this parameter is compiled-out. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Message-Id: <20220513062836.965425-5-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++++++ + migration/migration.h | 5 +++++ + migration/socket.c | 11 +++++++++-- + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 24 ++++++++++++++++++++++++ + 5 files changed, 76 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8a13294da6..b0fc3f68bd 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -888,6 +888,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++ params->zero_copy_send = s->parameters.zero_copy_send; ++#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1541,6 +1545,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ dest->zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1653,6 +1662,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ s->parameters.zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2543,6 +2557,17 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.zero_copy_send; ++} ++#endif ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +@@ -4193,6 +4218,10 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, ++ parameters.zero_copy_send, false), ++#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4290,6 +4319,9 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/migration/migration.h b/migration/migration.h +index d016cedd9d..908098939f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -339,6 +339,11 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void); ++#else ++#define migrate_use_zero_copy_send() (false) ++#endif + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/socket.c b/migration/socket.c +index 05705a32d8..3754d8f72c 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, + + if (qio_task_propagate_error(task, &err)) { + trace_migration_socket_outgoing_error(error_get_pretty(err)); +- } else { +- trace_migration_socket_outgoing_connected(data->hostname); ++ goto out; + } ++ ++ trace_migration_socket_outgoing_connected(data->hostname); ++ ++ if (migrate_use_zero_copy_send()) { ++ error_setg(&err, "Zero copy send not available in migration"); ++ } ++ ++out: + migration_channel_connect(data->s, sioc, data->hostname, err); + object_unref(OBJECT(sioc)); + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 2669156b28..e02da5008b 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1297,6 +1297,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++#ifdef CONFIG_LINUX ++ case MIGRATION_PARAMETER_ZERO_COPY_SEND: ++ p->has_zero_copy_send = true; ++ visit_type_bool(v, param, &p->zero_copy_send, &err); ++ break; ++#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index bbfd48cf0b..59b5c5780b 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -730,6 +730,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -769,6 +776,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', ++ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -895,6 +903,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -949,6 +964,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1095,6 +1111,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1147,6 +1170,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch b/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch new file mode 100644 index 0000000..390ab62 --- /dev/null +++ b/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch @@ -0,0 +1,329 @@ +From 4fead335ef5aca7c70296c082b0abc872e053d30 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 10/34] migration: All this fields are unsigned +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [10/34] 59e3378a852a31a9942d1dd8255a9c08e442f53b +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +So printing it as %d is wrong. Notice that for the channel id, that +is an uint8_t, but I changed it anyways for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Peter Xu +(cherry picked from commit 04e114049406dbb69fc9043c795ddd28fdba31a6) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 20 ++++++++++---------- + migration/multifd-zstd.c | 24 ++++++++++++------------ + migration/multifd.c | 16 ++++++++-------- + migration/trace-events | 26 +++++++++++++------------- + 4 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a1950a4588..a987e4a26c 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -52,7 +52,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { + g_free(z); +- error_setg(errp, "multifd %d: deflate init failed", p->id); ++ error_setg(errp, "multifd %u: deflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -62,7 +62,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + deflateEnd(&z->zs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + p->data = z; +@@ -134,12 +134,12 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + ret = deflate(zs, flush); + } while (ret == Z_OK && zs->avail_in && zs->avail_out); + if (ret == Z_OK && zs->avail_in) { +- error_setg(errp, "multifd %d: deflate failed to compress all input", ++ error_setg(errp, "multifd %u: deflate failed to compress all input", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } +@@ -193,7 +193,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + zs->avail_in = 0; + zs->next_in = Z_NULL; + if (inflateInit(zs) != Z_OK) { +- error_setg(errp, "multifd %d: inflate init failed", p->id); ++ error_setg(errp, "multifd %u: inflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -203,7 +203,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + inflateEnd(zs); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -252,7 +252,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZLIB) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } +@@ -289,19 +289,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret == Z_OK && zs->avail_in + && (zs->total_out - start) < page_size); + if (ret == Z_OK && (zs->total_out - start) < page_size) { +- error_setg(errp, "multifd %d: inflate generated too few output", ++ error_setg(errp, "multifd %u: inflate generated too few output", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } + } + out_size = zs->total_out - out_size; + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index d9ed42622b..2185a83eac 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -56,7 +56,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createCStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createCStream failed", p->id); + return -1; + } + +@@ -64,7 +64,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (ZSTD_isError(res)) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: initCStream failed with error %s", ++ error_setg(errp, "multifd %u: initCStream failed with error %s", + p->id, ZSTD_getErrorName(res)); + return -1; + } +@@ -75,7 +75,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -146,12 +146,12 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.size - z->out.pos > 0)); + if (ret > 0 && (z->in.size - z->in.pos > 0)) { +- error_setg(errp, "multifd %d: compressStream buffer too small", ++ error_setg(errp, "multifd %u: compressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: compressStream error %s", ++ error_setg(errp, "multifd %u: compressStream error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -201,7 +201,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createDStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createDStream failed", p->id); + return -1; + } + +@@ -209,7 +209,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (ZSTD_isError(ret)) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: initDStream failed with error %s", ++ error_setg(errp, "multifd %u: initDStream failed with error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -222,7 +222,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -270,7 +270,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZSTD) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } +@@ -302,19 +302,19 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.pos < page_size)); + if (ret > 0 && (z->out.pos < page_size)) { +- error_setg(errp, "multifd %d: decompressStream buffer too small", ++ error_setg(errp, "multifd %u: decompressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: decompressStream returned %s", ++ error_setg(errp, "multifd %u: decompressStream returned %s", + p->id, ZSTD_getErrorName(ret)); + return ret; + } + out_size += z->out.pos; + } + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 0533da154a..d0d19470f9 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -148,7 +148,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +@@ -212,8 +212,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.version != MULTIFD_VERSION) { +- error_setg(errp, "multifd: received packet version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received packet version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -303,7 +303,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + packet->version = be32_to_cpu(packet->version); + if (packet->version != MULTIFD_VERSION) { + error_setg(errp, "multifd: received packet " +- "version %d and expected version %d", ++ "version %u and expected version %u", + packet->version, MULTIFD_VERSION); + return -1; + } +@@ -317,7 +317,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + */ + if (packet->pages_alloc > pages_max * 100) { + error_setg(errp, "multifd: received packet " +- "with size %d and expected a maximum size of %d", ++ "with size %u and expected a maximum size of %u", + packet->pages_alloc, pages_max * 100) ; + return -1; + } +@@ -333,7 +333,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages->num = be32_to_cpu(packet->pages_used); + if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " +- "with %d pages and expected maximum pages are %d", ++ "with %u pages and expected maximum pages are %u", + p->pages->num, packet->pages_alloc) ; + return -1; + } +diff --git a/migration/trace-events b/migration/trace-events +index b48d873b8a..5172cb3b3d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -115,23 +115,23 @@ ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void * + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + + # multifd.c +-multifd_new_send_channel_async(uint8_t id) "channel %d" +-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_recv_new_channel(uint8_t id) "channel %d" ++multifd_new_send_channel_async(uint8_t id) "channel %u" ++multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" +-multifd_recv_sync_main_signal(uint8_t id) "channel %d" +-multifd_recv_sync_main_wait(uint8_t id) "channel %d" ++multifd_recv_sync_main_signal(uint8_t id) "channel %u" ++multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" +-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_recv_thread_start(uint8_t id) "%d" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_send_error(uint8_t id) "channel %d" ++multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_recv_thread_start(uint8_t id) "%u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" +-multifd_send_sync_main_signal(uint8_t id) "channel %d" +-multifd_send_sync_main_wait(uint8_t id) "channel %d" ++multifd_send_sync_main_signal(uint8_t id) "channel %u" ++multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_send_thread_start(uint8_t id) "%d" ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" + multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch new file mode 100644 index 0000000..c69a941 --- /dev/null +++ b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch @@ -0,0 +1,93 @@ +From cfcde3507dd742c0e17cdfe3ac3bf076cc131a84 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 19 Jul 2022 09:23:45 -0300 +Subject: [PATCH 32/34] migration: Avoid false-positive on non-supported + scenarios for zero-copy-send +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [32/34] 409d9f974c5d69cdb4df8ef44f45c6cb25638144 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Migration with zero-copy-send currently has it's limitations, as it can't +be used with TLS nor any kind of compression. In such scenarios, it should +output errors during parameter / capability setting. + +But currently there are some ways of setting this not-supported scenarios +without printing the error message: + +!) For 'compression' capability, it works by enabling it together with +zero-copy-send. This happens because the validity test for zero-copy uses +the helper unction migrate_use_compression(), which check for compression +presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. + +The point here is: the validity test happens before the capability gets +enabled. If all of them get enabled together, this test will not return +error. + +In order to fix that, replace migrate_use_compression() by directly testing +the cap_list parameter migrate_caps_check(). + +2) For features enabled by parameters such as TLS & 'multifd_compression', +there was also a possibility of setting non-supported scenarios: setting +zero-copy-send first, then setting the unsupported parameter. + +In order to fix that, also add a check for parameters conflicting with +zero-copy-send on migrate_params_check(). + +3) XBZRLE is also a compression capability, so it makes sense to also add +it to the list of capabilities which are not supported with zero-copy-send. + +Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") +Signed-off-by: Leonardo Bras +Message-Id: <20220719122345.253713-1-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a3e0ac954c..b1fe50a749 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1254,7 +1254,9 @@ static bool migrate_caps_check(bool *cap_list, + #ifdef CONFIG_LINUX + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && + (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- migrate_use_compression() || ++ cap_list[MIGRATION_CAPABILITY_COMPRESS] || ++ cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); +@@ -1491,6 +1493,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_use_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ + return true; + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch new file mode 100644 index 0000000..35a8958 --- /dev/null +++ b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -0,0 +1,289 @@ +From a1853831de58b56278ef02964fd8c86ed19c2007 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:45 -0300 +Subject: [PATCH 26/34] migration: Change zero_copy_send from migration + parameter to migration capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [26/34] 249c0aaac45b33db0ba2f6d2010d61947d4e96f9 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +When originally implemented, zero_copy_send was designed as a Migration +paramenter. + +But taking into account how is that supposed to work, and how +the difference between a capability and a parameter, it only makes sense +that zero-copy-send would work better as a capability. + +Taking into account how recently the change got merged, it was decided +that it's still time to make it right, and convert zero_copy_send into +a Migration capability. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: always define the capability, even on non-Linux but error if +set; avoids build problems with the capability +(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 58 +++++++++++++++++++------------------------ + monitor/hmp-cmds.c | 6 ----- + qapi/migration.json | 33 +++++++----------------- + 3 files changed, 34 insertions(+), 63 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 5357efd348..c8aa55d2fe 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -162,7 +162,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID); ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); + + bool migrate_pre_2_2; + +@@ -888,10 +889,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +- params->zero_copy_send = s->parameters.zero_copy_send; +-#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1249,6 +1246,24 @@ static bool migrate_caps_check(bool *cap_list, + } + } + ++#ifdef CONFIG_LINUX ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || ++ migrate_use_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ + /* incoming side only */ + if (runstate_check(RUN_STATE_INMIGRATE) && + !migrate_multifd_is_allowed() && +@@ -1471,16 +1486,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +-#ifdef CONFIG_LINUX +- if (params->zero_copy_send && +- (!migrate_use_multifd() || +- params->multifd_compression != MULTIFD_COMPRESSION_NONE || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif + return true; + } + +@@ -1554,11 +1559,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- dest->zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1671,11 +1671,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- s->parameters.zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2573,7 +2568,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->parameters.zero_copy_send; ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -4236,10 +4231,6 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, +- parameters.zero_copy_send, false), +-#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4277,6 +4268,10 @@ static Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -4337,9 +4332,6 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +-#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index e02da5008b..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1297,12 +1297,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; +-#ifdef CONFIG_LINUX +- case MIGRATION_PARAMETER_ZERO_COPY_SEND: +- p->has_zero_copy_send = true; +- visit_type_bool(v, param, &p->zero_copy_send, &err); +- break; +-#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 59b5c5780b..fe70a0c4b2 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -452,6 +452,13 @@ + # procedure starts. The VM RAM is saved with running VM. + # (since 6.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# (since 7.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -465,7 +472,8 @@ + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, +- 'validate-uuid', 'background-snapshot'] } ++ 'validate-uuid', 'background-snapshot', ++ 'zero-copy-send'] } + + ## + # @MigrationCapabilityStatus: +@@ -730,12 +738,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) + # + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such +@@ -776,7 +778,6 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', +- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -903,13 +904,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -964,7 +958,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1111,13 +1104,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1170,7 +1156,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch b/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch new file mode 100644 index 0000000..1339e20 --- /dev/null +++ b/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch @@ -0,0 +1,122 @@ +From be7a79cd5eb65f9835593f353220a3fe4fa7f30c Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:40 +0000 +Subject: [PATCH 27/34] migration: Introduce ram_transferred_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [27/34] fdc6eea0f4cf5ace0a71d981218ce150e98654ff +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Replace direct manipulation of ram_counters.transferred with a +function. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 4c2d0f6dca24f3396ab0718ad3f9f53cc53004df) +Signed-off-by: Leonardo Bras +--- + migration/ram.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3e208efca7..3e82c4ff46 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -391,6 +391,11 @@ uint64_t ram_bytes_remaining(void) + + MigrationStats ram_counters; + ++static void ram_transferred_add(uint64_t bytes) ++{ ++ ram_counters.transferred += bytes; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +@@ -772,7 +777,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, + * RAM_SAVE_FLAG_CONTINUE. + */ + xbzrle_counters.bytes += bytes_xbzrle - 8; +- ram_counters.transferred += bytes_xbzrle; ++ ram_transferred_add(bytes_xbzrle); + + return 1; + } +@@ -1203,7 +1208,7 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) + + if (len) { + ram_counters.duplicate++; +- ram_counters.transferred += len; ++ ram_transferred_add(len); + return 1; + } + return -1; +@@ -1239,7 +1244,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } + + if (bytes_xmit) { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + *pages = 1; + } + +@@ -1270,8 +1275,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + uint8_t *buf, bool async) + { +- ram_counters.transferred += save_page_header(rs, rs->f, block, +- offset | RAM_SAVE_FLAG_PAGE); ++ ram_transferred_add(save_page_header(rs, rs->f, block, ++ offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, + migrate_release_ram() & +@@ -1279,7 +1284,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); + } +- ram_counters.transferred += TARGET_PAGE_SIZE; ++ ram_transferred_add(TARGET_PAGE_SIZE); + ram_counters.normal++; + return 1; + } +@@ -1378,7 +1383,7 @@ exit: + static void + update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + + if (param->zero_page) { + ram_counters.duplicate++; +@@ -2303,7 +2308,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + ram_counters.duplicate += pages; + } else { + ram_counters.normal += pages; +- ram_counters.transferred += size; ++ ram_transferred_add(size); + qemu_update_position(f, size); + } + } +@@ -3147,7 +3152,7 @@ out: + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +- ram_counters.transferred += 8; ++ ram_transferred_add(8); + + ret = qemu_file_get_error(f); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch b/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch new file mode 100644 index 0000000..45d143c --- /dev/null +++ b/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch @@ -0,0 +1,116 @@ +From 2bde4dfec804afa72a3b18e41798612256fe1722 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 01/34] migration: Never call twice qemu_target_page_size() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [1/34] 0c99a1b9648103cfba65e724578e922ab05cce78 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 144fa06b3431e806057ce1438338395b35a3e544) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 7 ++++--- + migration/multifd.c | 7 ++++--- + migration/savevm.c | 5 +++-- + 3 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a87ff01b81..8a13294da6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -992,6 +992,8 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s) + + static void populate_ram_info(MigrationInfo *info, MigrationState *s) + { ++ size_t page_size = qemu_target_page_size(); ++ + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_counters.transferred; +@@ -1000,12 +1002,11 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = ram_counters.normal; +- info->ram->normal_bytes = ram_counters.normal * +- qemu_target_page_size(); ++ info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->postcopy_requests = ram_counters.postcopy_requests; +- info->ram->page_size = qemu_target_page_size(); ++ info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; + +diff --git a/migration/multifd.c b/migration/multifd.c +index 7c9deb1921..8125d0015c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -289,7 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + { + MultiFDPacket_t *packet = p->packet; +- uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t pages_max = MULTIFD_PACKET_SIZE / page_size; + RAMBlock *block; + int i; + +@@ -358,14 +359,14 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + for (i = 0; i < p->pages->used; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +- if (offset > (block->used_length - qemu_target_page_size())) { ++ if (offset > (block->used_length - page_size)) { + error_setg(errp, "multifd: offset too long %" PRIu64 + " (max " RAM_ADDR_FMT ")", + offset, block->used_length); + return -1; + } + p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = qemu_target_page_size(); ++ p->pages->iov[i].iov_len = page_size; + } + + return 0; +diff --git a/migration/savevm.c b/migration/savevm.c +index d59e976d50..0bef031acb 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1685,6 +1685,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + { + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; ++ size_t page_size = qemu_target_page_size(); + Error *local_err = NULL; + + trace_loadvm_postcopy_handle_advise(); +@@ -1741,13 +1742,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + } + + remote_tps = qemu_get_be64(mis->from_src_file); +- if (remote_tps != qemu_target_page_size()) { ++ if (remote_tps != page_size) { + /* + * Again, some differences could be dealt with, but for now keep it + * simple. + */ + error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", +- (int)remote_tps, qemu_target_page_size()); ++ (int)remote_tps, page_size); + return -1; + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch b/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch new file mode 100644 index 0000000..c9ff787 --- /dev/null +++ b/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch @@ -0,0 +1,122 @@ +From 849a82a35629d480cdfa451310b77edd5ee00aa4 Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:41 +0000 +Subject: [PATCH 28/34] migration: Tally pre-copy, downtime and post-copy bytes + independently +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [28/34] a6e61ae2f016d020b2456be6087aeb7d4b9f9387 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Provide information on the number of bytes copied in the pre-copy, +downtime and post-copy phases of migration. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae6806688016711bb9ec7541266d76ab511c5e3b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 3 +++ + migration/ram.c | 7 +++++++ + monitor/hmp-cmds.c | 12 ++++++++++++ + qapi/migration.json | 13 ++++++++++++- + 4 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c8aa55d2fe..87b4a6c3f9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1010,6 +1010,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; ++ info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->postcopy_bytes = ram_counters.postcopy_bytes; + + if (migrate_use_xbzrle()) { + info->has_xbzrle_cache = true; +diff --git a/migration/ram.c b/migration/ram.c +index 3e82c4ff46..e7173da217 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -393,6 +393,13 @@ MigrationStats ram_counters; + + static void ram_transferred_add(uint64_t bytes) + { ++ if (runstate_is_running()) { ++ ram_counters.precopy_bytes += bytes; ++ } else if (migration_in_postcopy()) { ++ ram_counters.postcopy_bytes += bytes; ++ } else { ++ ram_counters.downtime_bytes += bytes; ++ } + ram_counters.transferred += bytes; + } + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 2669156b28..8c384dc1b2 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", + info->ram->postcopy_requests); + } ++ if (info->ram->precopy_bytes) { ++ monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", ++ info->ram->precopy_bytes >> 10); ++ } ++ if (info->ram->downtime_bytes) { ++ monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", ++ info->ram->downtime_bytes >> 10); ++ } ++ if (info->ram->postcopy_bytes) { ++ monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", ++ info->ram->postcopy_bytes >> 10); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index fe70a0c4b2..c8ec260ab0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -46,6 +46,15 @@ + # @pages-per-second: the number of memory pages transferred per second + # (Since 4.0) + # ++# @precopy-bytes: The number of bytes sent in the pre-copy phase ++# (since 7.0). ++# ++# @downtime-bytes: The number of bytes sent while the guest is paused ++# (since 7.0). ++# ++# @postcopy-bytes: The number of bytes sent during the post-copy phase ++# (since 7.0). ++# + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -54,7 +63,9 @@ + 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', + 'mbps' : 'number', 'dirty-sync-count' : 'int', + 'postcopy-requests' : 'int', 'page-size' : 'int', +- 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } } ++ 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', ++ 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', ++ 'postcopy-bytes' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch new file mode 100644 index 0000000..a985c87 --- /dev/null +++ b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch @@ -0,0 +1,62 @@ +From d40f63360fc8677ac2ac3a679bab4c1e3dbe334f Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 25 Jul 2022 22:02:35 -0300 +Subject: [PATCH 33/34] migration: add remaining params->has_* = true in + migration_instance_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [33/34] cc70a134ae27296e8a07dffd4dfccf1a329f27f1 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Some of params->has_* = true are missing in migration_instance_init, this +causes migrate_params_check() to skip some tests, allowing some +unsupported scenarios. + +Fix this by adding all missing params->has_* = true in +migration_instance_init(). + +Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") +Fixes: 1d58872a91 ("migration: do not wait for free thread") +Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") +Signed-off-by: Leonardo Bras +Message-Id: <20220726010235.342927-1-leobras@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index b1fe50a749..02f962ead0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4337,6 +4337,7 @@ static void migration_instance_init(Object *obj) + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; ++ params->has_compress_wait_thread = true; + params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; +@@ -4357,6 +4358,9 @@ static void migration_instance_init(Object *obj) + params->has_announce_max = true; + params->has_announce_rounds = true; + params->has_announce_step = true; ++ params->has_tls_creds = true; ++ params->has_tls_hostname = true; ++ params->has_tls_authz = true; + + qemu_sem_init(&ms->postcopy_pause_sem, 0); + qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch new file mode 100644 index 0000000..c155736 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch @@ -0,0 +1,83 @@ +From e7497ea1a0fa4d4a10fb76f3a274df29e487a277 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:13 -0300 +Subject: [PATCH 31/34] migration/multifd: Report to user when zerocopy not + working +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [31/34] 5aa1b4e6cfc23dd8474844ef8ffa9eb996355e20 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Some errors, like the lack of Scatter-Gather support by the network +interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using +zero-copy, which causes it to fall back to the default copying mechanism. + +After each full dirty-bitmap scan there should be a zero-copy flush +happening, which checks for errors each of the previous calls to +sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then +increment dirty_sync_missed_zero_copy migration stat to let the user know +about it. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Peter Xu +Message-Id: <20220711211112.18951-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + migration/ram.c | 5 +++++ + migration/ram.h | 2 ++ + 3 files changed, 9 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 90ab4c4346..7c16523e6b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -631,6 +631,8 @@ int multifd_send_sync_main(QEMUFile *f) + if (ret < 0) { + error_report_err(err); + return -1; ++ } else if (ret == 1) { ++ dirty_sync_missed_zero_copy(); + } + } + } +diff --git a/migration/ram.c b/migration/ram.c +index e7173da217..93cdb456ac 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -403,6 +403,11 @@ static void ram_transferred_add(uint64_t bytes) + ram_counters.transferred += bytes; + } + ++void dirty_sync_missed_zero_copy(void) ++{ ++ ram_counters.dirty_sync_missed_zero_copy++; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +diff --git a/migration/ram.h b/migration/ram.h +index c515396a9a..69c3ccb26a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -88,4 +88,6 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + ++void dirty_sync_missed_zero_copy(void); ++ + #endif +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Add-missing-documention.patch b/SOURCES/kvm-multifd-Add-missing-documention.patch new file mode 100644 index 0000000..a4efe49 --- /dev/null +++ b/SOURCES/kvm-multifd-Add-missing-documention.patch @@ -0,0 +1,81 @@ +From a5a03cd1ffc772c9d2bbf8e97e971b0cb8daa617 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 03/34] multifd: Add missing documention +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [3/34] 3f73bc1414f80a3611c7a807671a04ddb16ec7da +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 18ede636bc29fd8bda628fe3e5c593f8c1b734f4) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 2 ++ + migration/multifd-zstd.c | 2 ++ + migration/multifd.c | 1 + + 3 files changed, 5 insertions(+) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index ab4ba75d75..f403d2f031 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -74,6 +74,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -96,6 +97,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 693bddf8c9..8d657f8860 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -86,6 +86,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -109,6 +110,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd.c b/migration/multifd.c +index 8ea86d81dc..cdeffdc4c5 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -66,6 +66,7 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + { +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch b/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch new file mode 100644 index 0000000..41ef14c --- /dev/null +++ b/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch @@ -0,0 +1,50 @@ +From ff3315e41f6e33e2ef7d764e064be60b3b7670e4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 07/34] multifd: Fill offset and block for reception +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [7/34] e1c460e910a7de2bbe21221b4c54da0bbc09b4c0 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +We were using the iov directly, but we will need this info on the +following patch. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 01102a2ef6c97acc5cc8a2c3bb62b7665a20f51f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 55d99a8232..0533da154a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -354,6 +354,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + ++ p->pages->block = block; + for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +@@ -363,6 +364,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + offset, block->used_length); + return -1; + } ++ p->pages->offset[i] = offset; + p->pages->iov[i].iov_base = block->host + offset; + p->pages->iov[i].iov_len = page_size; + } +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch new file mode 100644 index 0000000..3243f19 --- /dev/null +++ b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -0,0 +1,182 @@ +From fb4bec0c863fb397078ab6086e95d5401be04ef2 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 23/34] multifd: Implement zero copy write in multifd migration + (multifd-zero-copy) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [23/34] ef7e8d02dde6570dc8cdf232f7ea03c997ee2e40 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Implement zero copy send on nocomp_send_write(), by making use of QIOChannel +writev + flags & flush interface. + +Change multifd_send_sync_main() so flush_zero_copy() can be called +after each iteration in order to make sure all dirty pages are sent before +a new iteration is started. It will also flush at the beginning and at the +end of migration. + +Also make it return -1 if flush_zero_copy() fails, in order to cancel +the migration process, and avoid resuming the guest in the target host +without receiving all current RAM. + +This will work fine on RAM migration because the RAM pages are not usually freed, +and there is no problem on changing the pages content between writev_zero_copy() and +the actual sending of the buffer, because this change will dirty the page and +cause it to be re-sent on a next iteration anyway. + +A lot of locked memory may be needed in order to use multifd migration +with zero-copy enabled, so disabling the feature should be necessary for +low-privileged users trying to perform multifd migrations. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-9-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 11 ++++++++++- + migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- + migration/multifd.h | 2 ++ + migration/socket.c | 5 +++-- + 4 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8e28f2ee41..5357efd348 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1471,7 +1471,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +- ++#ifdef CONFIG_LINUX ++ if (params->zero_copy_send && ++ (!migrate_use_multifd() || ++ params->multifd_compression != MULTIFD_COMPRESSION_NONE || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif + return true; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index 193f70cdba..90ab4c4346 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -576,6 +576,7 @@ void multifd_save_cleanup(void) + int multifd_send_sync_main(QEMUFile *f) + { + int i; ++ bool flush_zero_copy; + + if (!migrate_use_multifd()) { + return 0; +@@ -586,6 +587,20 @@ int multifd_send_sync_main(QEMUFile *f) + return -1; + } + } ++ ++ /* ++ * When using zero-copy, it's necessary to flush the pages before any of ++ * the pages can be sent again, so we'll make sure the new version of the ++ * pages will always arrive _later_ than the old pages. ++ * ++ * Currently we achieve this by flushing the zero-page requested writes ++ * per ram iteration, but in the future we could potentially optimize it ++ * to be less frequent, e.g. only after we finished one whole scanning of ++ * all the dirty bitmaps. ++ */ ++ ++ flush_zero_copy = migrate_use_zero_copy_send(); ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -607,6 +622,17 @@ int multifd_send_sync_main(QEMUFile *f) + ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); ++ ++ if (flush_zero_copy && p->c) { ++ int ret; ++ Error *err = NULL; ++ ++ ret = qio_channel_flush(p->c, &err); ++ if (ret < 0) { ++ error_report_err(err); ++ return -1; ++ } ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -691,8 +717,8 @@ static void *multifd_send_thread(void *opaque) + p->iov[0].iov_base = p->packet; + } + +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); ++ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, ++ 0, p->write_flags, &local_err); + if (ret != 0) { + break; + } +@@ -933,6 +959,13 @@ int multifd_save_setup(Error **errp) + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); ++ ++ if (migrate_use_zero_copy_send()) { ++ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } else { ++ p->write_flags = 0; ++ } ++ + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 92de878155..11d5e273e6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -95,6 +95,8 @@ typedef struct { + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; ++ /* multifd flags for sending ram */ ++ int write_flags; + /* multifd flags for each packet */ + uint32_t flags; + /* size of the next packet that contains pages */ +diff --git a/migration/socket.c b/migration/socket.c +index 3754d8f72c..4fd5e85f50 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send()) { +- error_setg(&err, "Zero copy send not available in migration"); ++ if (migrate_use_zero_copy_send() && ++ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + + out: +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch b/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..817f0de --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch @@ -0,0 +1,98 @@ +From e74b927853e84b44f8047718020593939ad125ec Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 09/34] multifd: Make zlib compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [9/34] 37d70e2163ed982e2d8343c4ec1061fc59677688 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a5ed22948873b50fcf1415d1ce15c71d61a9388d) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 330fc021c5..a1950a4588 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -100,8 +101,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; +@@ -115,8 +116,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_in = iov[i].iov_len; +- zs->next_in = iov[i].iov_base; ++ zs->avail_in = page_size; ++ zs->next_in = p->pages->block->host + p->pages->offset[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +@@ -240,6 +241,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ +@@ -264,7 +266,6 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + zs->next_in = z->zbuff; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +@@ -272,8 +273,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_out = iov->iov_len; +- zs->next_out = iov->iov_base; ++ zs->avail_out = page_size; ++ zs->next_out = p->pages->block->host + p->pages->offset[i]; + + /* + * Welcome to inflate semantics +@@ -286,8 +287,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = inflate(zs, flush); + } while (ret == Z_OK && zs->avail_in +- && (zs->total_out - start) < iov->iov_len); +- if (ret == Z_OK && (zs->total_out - start) < iov->iov_len) { ++ && (zs->total_out - start) < page_size); ++ if (ret == Z_OK && (zs->total_out - start) < page_size) { + error_setg(errp, "multifd %d: inflate generated too few output", + p->id); + return -1; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch b/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch new file mode 100644 index 0000000..7c0cc8e --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch @@ -0,0 +1,53 @@ +From 9cc84ed4e52807912598f3cdef3ad08e9166cdea Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 12/34] multifd: Make zlib use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [12/34] 7cca02fb1706db0b1336d43ef4b1b6a16acf21a1 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 48a4a44c1cde382c6b8e7792d01fe7d9b0a59c69) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a987e4a26c..96475e096e 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -145,6 +145,9 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + } + out_size += available - zs->avail_out; + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = out_size; ++ p->iovs_num++; + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + +@@ -164,10 +167,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zlib_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch b/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..12d56a7 --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch @@ -0,0 +1,94 @@ +From dc083c2407de0a668573e549b8357f451554e376 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 08/34] multifd: Make zstd compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [8/34] 953059f89f3b79f4c515c16877052522c3104753 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit f5ff548774c22b34a0c0e2fef85f1be11160d774) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f0d1105792..d9ed42622b 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -113,8 +114,8 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + int ret; + uint32_t i; + +@@ -128,8 +129,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = iov[i].iov_base; +- z->in.size = iov[i].iov_len; ++ z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.size = page_size; + z->in.pos = 0; + + /* +@@ -261,7 +262,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = p->pages->num * qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * page_size; + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,10 +285,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + z->in.pos = 0; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; +- +- z->out.dst = iov->iov_base; +- z->out.size = iov->iov_len; ++ z->out.dst = p->pages->block->host + p->pages->offset[i]; ++ z->out.size = page_size; + z->out.pos = 0; + + /* +@@ -300,8 +300,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); + } while (ret > 0 && (z->in.size - z->in.pos > 0) +- && (z->out.pos < iov->iov_len)); +- if (ret > 0 && (z->out.pos < iov->iov_len)) { ++ && (z->out.pos < page_size)); ++ if (ret > 0 && (z->out.pos < page_size)) { + error_setg(errp, "multifd %d: decompressStream buffer too small", + p->id); + return -1; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch b/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch new file mode 100644 index 0000000..16dece0 --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch @@ -0,0 +1,53 @@ +From a451644d96f572f5845d3ee523e54486da55d9ae Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 13/34] multifd: Make zstd use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [13/34] 37929ac695c7bdfe6e2f798d4f5e43a5e7525acb +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 0a818b89eb8eaf79ae651405907d8110a0935cfd) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 2185a83eac..4e60cdbc54 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -156,6 +156,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return -1; + } + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = z->out.pos; ++ p->iovs_num++; + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + +@@ -175,10 +178,7 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zstd_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch b/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch new file mode 100644 index 0000000..8c014a3 --- /dev/null +++ b/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch @@ -0,0 +1,190 @@ +From faae5f3dd29a25607f34466b9cd11d17ff6a0db6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 11/34] multifd: Move iov from pages to params +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [11/34] 382f1d5db714944bd12f264db9ad0e37ddb2cfeb +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +This will allow us to reduce the number of system calls on the next patch. + +Signed-off-by: Juan Quintela +(cherry picked from commit 226468ba3dea950ab4bb0b729878dde25812da1c) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 34 ++++++++++++++++++++++++---------- + migration/multifd.h | 8 ++++++-- + 2 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0d19470f9..5004f394aa 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -86,7 +86,16 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = p->pages->num * qemu_target_page_size(); ++ MultiFDPages_t *pages = p->pages; ++ size_t page_size = qemu_target_page_size(); ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ p->iov[p->iovs_num].iov_len = page_size; ++ p->iovs_num++; ++ } ++ ++ p->next_packet_size = p->pages->num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -104,7 +113,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- return qio_channel_writev_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +@@ -146,13 +155,18 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ size_t page_size = qemu_target_page_size(); + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[i].iov_base = p->pages->block->host + p->pages->offset[i]; ++ p->iov[i].iov_len = page_size; ++ } ++ return qio_channel_readv_all(p->c, p->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -242,7 +256,6 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + + pages->allocated = size; +- pages->iov = g_new0(struct iovec, size); + pages->offset = g_new0(ram_addr_t, size); + + return pages; +@@ -254,8 +267,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +- g_free(pages->iov); +- pages->iov = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +@@ -365,8 +376,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + p->pages->offset[i] = offset; +- p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = page_size; + } + + return 0; +@@ -470,8 +479,6 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + + if (pages->block == block) { + pages->offset[pages->num] = offset; +- pages->iov[pages->num].iov_base = block->host + offset; +- pages->iov[pages->num].iov_len = qemu_target_page_size(); + pages->num++; + + if (pages->num < pages->allocated) { +@@ -564,6 +571,8 @@ void multifd_save_cleanup(void) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -651,6 +660,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; ++ p->iovs_num = 0; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -919,6 +929,7 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); ++ p->iov = g_new0(struct iovec, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +@@ -1018,6 +1029,8 @@ int multifd_load_cleanup(Error **errp) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_recv_state->ops->recv_cleanup(p); + } + qemu_sem_destroy(&multifd_recv_state->sem_sync); +@@ -1158,6 +1171,7 @@ int multifd_load_setup(Error **errp) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->name = g_strdup_printf("multifdrecv_%d", i); ++ p->iov = g_new0(struct iovec, page_count); + } + + for (i = 0; i < thread_count; i++) { +diff --git a/migration/multifd.h b/migration/multifd.h +index e57adc783b..c3f18af364 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -62,8 +62,6 @@ typedef struct { + uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; +- /* pointer to each page */ +- struct iovec *iov; + RAMBlock *block; + } MultiFDPages_t; + +@@ -110,6 +108,10 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to send */ ++ struct iovec *iov; ++ /* number of iovs used */ ++ uint32_t iovs_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +@@ -149,6 +151,8 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to recv */ ++ struct iovec *iov; + /* used for de-compression methods */ + void *data; + } MultiFDRecvParams; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Remove-send_write-method.patch b/SOURCES/kvm-multifd-Remove-send_write-method.patch new file mode 100644 index 0000000..c37ef79 --- /dev/null +++ b/SOURCES/kvm-multifd-Remove-send_write-method.patch @@ -0,0 +1,160 @@ +From e93de1066dde56befa50d1466955c7b7432604d1 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 14/34] multifd: Remove send_write() method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [14/34] 483abd10c7cf11f27599ebcfb0586eb91b6519c1 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Everything use now iov's. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 468fcb5dd0c965e1af0da9efab09b1462631da18) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 ----------------- + migration/multifd-zstd.c | 17 ----------------- + migration/multifd.c | 20 ++------------------ + migration/multifd.h | 2 -- + 4 files changed, 2 insertions(+), 54 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 96475e096e..8ed29b9633 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -154,22 +154,6 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zlib_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zlib_recv_setup: setup receive side + * +@@ -312,7 +296,6 @@ static MultiFDMethods multifd_zlib_ops = { + .send_setup = zlib_send_setup, + .send_cleanup = zlib_send_cleanup, + .send_prepare = zlib_send_prepare, +- .send_write = zlib_send_write, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, + .recv_pages = zlib_recv_pages +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 4e60cdbc54..25e1f517b5 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -165,22 +165,6 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zstd_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zstd_recv_setup: setup receive side + * +@@ -325,7 +309,6 @@ static MultiFDMethods multifd_zstd_ops = { + .send_setup = zstd_send_setup, + .send_cleanup = zstd_send_cleanup, + .send_prepare = zstd_send_prepare, +- .send_write = zstd_send_write, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, + .recv_pages = zstd_recv_pages +diff --git a/migration/multifd.c b/migration/multifd.c +index 5004f394aa..1e1551d78b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -100,22 +100,6 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * nocomp_send_write: do the actual write of the data +- * +- * For no compression we just have to write the data. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * nocomp_recv_setup: setup receive side + * +@@ -173,7 +157,6 @@ static MultiFDMethods multifd_nocomp_ops = { + .send_setup = nocomp_send_setup, + .send_cleanup = nocomp_send_cleanup, + .send_prepare = nocomp_send_prepare, +- .send_write = nocomp_send_write, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, + .recv_pages = nocomp_recv_pages +@@ -687,7 +670,8 @@ static void *multifd_send_thread(void *opaque) + } + + if (used) { +- ret = multifd_send_state->ops->send_write(p, used, &local_err); ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index c3f18af364..7496f951a7 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -164,8 +164,6 @@ typedef struct { + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ + int (*send_prepare)(MultiFDSendParams *p, Error **errp); +- /* Write the send packet */ +- int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Rename-used-field-to-num.patch b/SOURCES/kvm-multifd-Rename-used-field-to-num.patch new file mode 100644 index 0000000..51bbfbe --- /dev/null +++ b/SOURCES/kvm-multifd-Rename-used-field-to-num.patch @@ -0,0 +1,177 @@ +From 030004b805604114aeaf8b9344b496332f433f71 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 02/34] multifd: Rename used field to num +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [2/34] 5e411060b289cdabdf66c0774a55e109c0ef2906 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +We will need to split it later in zero_num (number of zero pages) and +normal_num (number of normal pages). This name is better. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 90a3d2f9d5f729147b2827c177932603ae6e2d55) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 38 +++++++++++++++++++------------------- + migration/multifd.h | 2 +- + 2 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8125d0015c..8ea86d81dc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -252,7 +252,7 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + + static void multifd_pages_clear(MultiFDPages_t *pages) + { +- pages->used = 0; ++ pages->num = 0; + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +@@ -270,7 +270,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->used); ++ packet->pages_used = cpu_to_be32(p->pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -278,7 +278,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->pages->offset[i]; + +@@ -332,18 +332,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages = multifd_pages_init(packet->pages_alloc); + } + +- p->pages->used = be32_to_cpu(packet->pages_used); +- if (p->pages->used > packet->pages_alloc) { ++ p->pages->num = be32_to_cpu(packet->pages_used); ++ if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " + "with %d pages and expected maximum pages are %d", +- p->pages->used, packet->pages_alloc) ; ++ p->pages->num, packet->pages_alloc) ; + return -1; + } + + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + +- if (p->pages->used == 0) { ++ if (p->pages->num == 0) { + return 0; + } + +@@ -356,7 +356,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + + if (offset > (block->used_length - page_size)) { +@@ -443,13 +443,13 @@ static int multifd_send_pages(QEMUFile *f) + } + qemu_mutex_unlock(&p->mutex); + } +- assert(!p->pages->used); ++ assert(!p->pages->num); + assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; +- transferred = ((uint64_t) pages->used) * qemu_target_page_size() ++ transferred = ((uint64_t) pages->num) * qemu_target_page_size() + + p->packet_len; + qemu_file_update_transfer(f, transferred); + ram_counters.multifd_bytes += transferred; +@@ -469,12 +469,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + } + + if (pages->block == block) { +- pages->offset[pages->used] = offset; +- pages->iov[pages->used].iov_base = block->host + offset; +- pages->iov[pages->used].iov_len = qemu_target_page_size(); +- pages->used++; ++ pages->offset[pages->num] = offset; ++ pages->iov[pages->num].iov_base = block->host + offset; ++ pages->iov[pages->num].iov_len = qemu_target_page_size(); ++ pages->num++; + +- if (pages->used < pages->allocated) { ++ if (pages->num < pages->allocated) { + return 1; + } + } +@@ -586,7 +586,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (!migrate_use_multifd()) { + return; + } +- if (multifd_send_state->pages->used) { ++ if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; +@@ -649,7 +649,7 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->used; ++ uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + flags = p->flags; + +@@ -665,7 +665,7 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; +- p->pages->used = 0; ++ p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +@@ -1091,7 +1091,7 @@ static void *multifd_recv_thread(void *opaque) + break; + } + +- used = p->pages->used; ++ used = p->pages->num; + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +diff --git a/migration/multifd.h b/migration/multifd.h +index 15c50ca0b2..86820dd028 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -55,7 +55,7 @@ typedef struct { + + typedef struct { + /* number of used pages */ +- uint32_t used; ++ uint32_t num; + /* number of allocated pages */ + uint32_t allocated; + /* global number of generated multifd packets */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch new file mode 100644 index 0000000..cb12219 --- /dev/null +++ b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -0,0 +1,102 @@ +From 8a1b74503b17a1f48283eeec547579aad5bdb8f9 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 22/34] multifd: Send header packet without flags if + zero-copy-send is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [22/34] f8ea6e11134afe5291b6f404dc9b59557fbf1030 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Since d48c3a0445 ("multifd: Use a single writev on the send side"), +sending the header packet and the memory pages happens in the same +writev, which can potentially make the migration faster. + +Using channel-socket as example, this works well with the default copying +mechanism of sendmsg(), but with zero-copy-send=true, it will cause +the migration to often break. + +This happens because the header packet buffer gets reused quite often, +and there is a high chance that by the time the MSG_ZEROCOPY mechanism get +to send the buffer, it has already changed, sending the wrong data and +causing the migration to abort. + +It means that, as it is, the buffer for the header packet is not suitable +for sending with MSG_ZEROCOPY. + +In order to enable zero copy for multifd, send the header packet on an +individual write(), without any flags, and the remanining pages with a +writev(), as it was happening before. This only changes how a migration +with zero-copy-send=true works, not changing any current behavior for +migrations with zero-copy-send=false. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-8-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1e34e01ebc..193f70cdba 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -624,6 +624,7 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; ++ bool use_zero_copy_send = migrate_use_zero_copy_send(); + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -646,9 +647,14 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 1; + p->normal_num = 0; + ++ if (use_zero_copy_send) { ++ p->iovs_num = 0; ++ } else { ++ p->iovs_num = 1; ++ } ++ + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; +@@ -672,8 +678,18 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ } else { ++ /* Send header using the same writev call */ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ } + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch b/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch new file mode 100644 index 0000000..5f03ced --- /dev/null +++ b/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch @@ -0,0 +1,48 @@ +From 8814fdd9eba7a69c2b5f7df8a8db49d7c2ac8378 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 04/34] multifd: The variable is only used inside the loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [4/34] ea2b915fa5e795d88edaa3e0cf39a9f2c3cc2050 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 1943c11a62bd0741e5d9fbba78404fe47ebea820) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdeffdc4c5..ce7101cf9d 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -629,7 +629,6 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; +- uint32_t flags = 0; + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -652,7 +651,7 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; +- flags = p->flags; ++ uint32_t flags = p->flags; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, used, +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch b/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch new file mode 100644 index 0000000..495e201 --- /dev/null +++ b/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch @@ -0,0 +1,80 @@ +From ef864989dbe480e952bb3c5ea3cd9dfcf97bd455 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 15/34] multifd: Use a single writev on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [15/34] 3e421f67a5f69231280ed748318c0317da31ae95 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Until now, we wrote the packet header with write(), and the rest of the +pages with writev(). Just increase the size of the iovec and do a +single writev(). + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit d48c3a044537689866fe44e65d24c7d39a68868a) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1e1551d78b..d0f86542b1 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -643,7 +643,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 0; ++ p->iovs_num = 1; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -663,20 +663,15 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, used, flags, + p->next_packet_size); + +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } + +- if (used) { +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); +- if (ret != 0) { +- break; +- } +- } +- + qemu_mutex_lock(&p->mutex); + p->pending_job--; + qemu_mutex_unlock(&p->mutex); +@@ -913,7 +908,8 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); +- p->iov = g_new0(struct iovec, page_count); ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, page_count + 1); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch b/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch new file mode 100644 index 0000000..2bd055b --- /dev/null +++ b/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch @@ -0,0 +1,261 @@ +From c70c97cd59fd22de0957ea1c0a655fb5ef270f1e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 16/34] multifd: Use normal pages array on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [16/34] 24f4ea3248f6ce883d57344600c8adbf51bd7d8c +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +We are only sending normal pages through multifd channels. +Later on this series, we are going to also send zero pages. +We are going to detect if a page is zero or non zero in the multifd +channel thread, not on the main thread. + +So we receive an array of pages page->offset[N] + +And we will end with: + +p->normal[N - zero_pages] +p->zero[zero_pages]. + +In this patch, we just copy all the pages in offset to normal. + +for (i = 0; i < pages->num; i++) { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: +} + +Later in the series this becomes: + +for (i = 0; i < pages->num; i++) { + if (buffer_is_zero(page->offset[i])) { + p->zerol[p->zero_num] = pages->offset[i]; + p->zero_num++: + } else { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: + } +} + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Improving comment (dave) +Renaming num_normal_pages to total_normal_pages (peter) + +(cherry picked from commit 815956f03902980c771da64b17f7f791c1cb57b0) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 6 +++--- + migration/multifd-zstd.c | 6 +++--- + migration/multifd.c | 30 +++++++++++++++++++----------- + migration/multifd.h | 8 ++++++-- + migration/trace-events | 4 ++-- + 5 files changed, 33 insertions(+), 21 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 8ed29b9633..8508f26adf 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -108,16 +108,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } + + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->pages->offset[i]; ++ zs->next_in = p->pages->block->host + p->normal[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 25e1f517b5..693af3a140 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -123,13 +123,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.src = p->pages->block->host + p->normal[i]; + z->in.size = page_size; + z->in.pos = 0; + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0f86542b1..3725226400 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + MultiFDPages_t *pages = p->pages; + size_t page_size = qemu_target_page_size(); + +- for (int i = 0; i < p->pages->num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ for (int i = 0; i < p->normal_num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; + p->iov[p->iovs_num].iov_len = page_size; + p->iovs_num++; + } + +- p->next_packet_size = p->pages->num * page_size; ++ p->next_packet_size = p->normal_num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->num); ++ packet->pages_used = cpu_to_be32(p->normal_num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ +- uint64_t temp = p->pages->offset[i]; ++ uint64_t temp = p->normal[i]; + + packet->offset[i] = cpu_to_be64(temp); + } +@@ -556,6 +556,8 @@ void multifd_save_cleanup(void) + p->packet = NULL; + g_free(p->iov); + p->iov = NULL; ++ g_free(p->normal); ++ p->normal = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; + p->iovs_num = 1; ++ p->normal_num = 0; ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->normal[p->normal_num] = p->pages->offset[i]; ++ p->normal_num++; ++ } + +- if (used) { ++ if (p->normal_num) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); +@@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque) + multifd_send_fill_packet(p); + p->flags = 0; + p->num_packets++; +- p->num_pages += used; ++ p->total_normal_pages += p->normal_num; + p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, used, flags, ++ trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + + p->iov[0].iov_len = p->packet_len; +@@ -710,7 +717,7 @@ out: + qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); +- trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages); ++ trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); + + return NULL; + } +@@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp) + p->tls_hostname = g_strdup(s->hostname); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); ++ p->normal = g_new0(ram_addr_t, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 7496f951a7..7823199dbe 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -104,14 +104,18 @@ typedef struct { + /* thread local variables */ + /* packets sent through this channel */ + uint64_t num_packets; +- /* pages sent through this channel */ +- uint64_t num_pages; ++ /* non zero pages sent through this channel */ ++ uint64_t total_normal_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; ++ /* Pages that are not zero */ ++ ram_addr_t *normal; ++ /* num of non zero pages */ ++ uint32_t normal_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +diff --git a/migration/trace-events b/migration/trace-events +index 5172cb3b3d..171a83a55d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" + multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" + multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch new file mode 100644 index 0000000..a5b3747 --- /dev/null +++ b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -0,0 +1,163 @@ +From 9d8968ae10e1724198c493646e2bdcd55f205e5e Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 21/34] multifd: multifd_send_sync_main now returns negative on + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [21/34] bef7acbe751fc4871a9ece72ab58b195658d6328 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Even though multifd_send_sync_main() currently emits error_reports, it's +callers don't really check it before continuing. + +Change multifd_send_sync_main() to return -1 on error and 0 on success. +Also change all it's callers to make use of this change and possibly fail +earlier. + +(This change is important to next patch on multifd zero copy +implementation, to make it sure an error in zero-copy flush does not go +unnoticed. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Message-Id: <20220513062836.965425-7-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 10 ++++++---- + migration/multifd.h | 2 +- + migration/ram.c | 29 ++++++++++++++++++++++------- + 3 files changed, 29 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index e53811f04a..1e34e01ebc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -573,17 +573,17 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-void multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(QEMUFile *f) + { + int i; + + if (!migrate_use_multifd()) { +- return; ++ return 0; + } + if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); +- return; ++ return -1; + } + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -596,7 +596,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); +- return; ++ return -1; + } + + p->packet_num = multifd_send_state->packet_num++; +@@ -615,6 +615,8 @@ void multifd_send_sync_main(QEMUFile *f) + qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); ++ ++ return 0; + } + + static void *multifd_send_thread(void *opaque) +diff --git a/migration/multifd.h b/migration/multifd.h +index 7823199dbe..92de878155 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); + bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-void multifd_send_sync_main(QEMUFile *f); ++int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ +diff --git a/migration/ram.c b/migration/ram.c +index 863035d235..3e208efca7 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2992,6 +2992,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + { + RAMState **rsp = opaque; + RAMBlock *block; ++ int ret; + + if (compress_threads_save_setup()) { + return -1; +@@ -3026,7 +3027,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3135,7 +3140,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- multifd_send_sync_main(rs->f); ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_counters.transferred += 8; +@@ -3193,13 +3202,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- if (ret >= 0) { +- multifd_send_sync_main(rs->f); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret < 0) { ++ return ret; + } + +- return ret; ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ++ return 0; + } + + static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch b/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch new file mode 100644 index 0000000..a3b973d --- /dev/null +++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch @@ -0,0 +1,135 @@ +From 46c902b006cbf52341804e85f9246bdc8afc8611 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 05/34] multifd: remove used parameter from send_prepare() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [5/34] b4f1c9540bddd137756cab4bde4ba5d8eac09ab9 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 02fb81043ecee338e4aeb8f5be09a46325dc5e43) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 7 +++---- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 9 +++------ + migration/multifd.h | 2 +- + 4 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index f403d2f031..0c70a2dc78 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -96,10 +96,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; +@@ -108,11 +107,11 @@ static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 8d657f8860..466b370cad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -109,10 +109,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; +@@ -123,10 +122,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } + z->in.src = iov[i].iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index ce7101cf9d..098ef8842c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -82,13 +82,11 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_send_prepare(MultiFDSendParams *p, uint32_t used, +- Error **errp) ++static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = used * qemu_target_page_size(); ++ p->next_packet_size = p->pages->num * qemu_target_page_size(); + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -654,8 +652,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t flags = p->flags; + + if (used) { +- ret = multifd_send_state->ops->send_prepare(p, used, +- &local_err); ++ ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); + break; +diff --git a/migration/multifd.h b/migration/multifd.h +index 86820dd028..7968cc5c20 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -159,7 +159,7 @@ typedef struct { + /* Cleanup for sending side */ + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ +- int (*send_prepare)(MultiFDSendParams *p, uint32_t used, Error **errp); ++ int (*send_prepare)(MultiFDSendParams *p, Error **errp); + /* Write the send packet */ + int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch b/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch new file mode 100644 index 0000000..d019cf5 --- /dev/null +++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch @@ -0,0 +1,149 @@ +From 75ef79011b75d1024dc50a999d03ab90570e6533 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 06/34] multifd: remove used parameter from send_recv_pages() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 185: MSG_ZEROCOPY + Multifd @ rhel8.6 +RH-Commit: [6/34] fe59a7c794dd707cf71b2dc6104a0a89e2ac7d50 +RH-Bugzilla: 2117252 +RH-Acked-by: quintela1 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 40a4bfe9d3f8ad35a9c3ffb4cbf7367e2777054b) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 9 ++++----- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 7 +++---- + migration/multifd.h | 2 +- + 4 files changed, 11 insertions(+), 14 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 0c70a2dc78..330fc021c5 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -235,17 +235,16 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ + uint32_t out_size = zs->total_out; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + int ret; + int i; +@@ -264,12 +263,12 @@ static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + zs->avail_in = in_size; + zs->next_in = z->zbuff; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 466b370cad..f0d1105792 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -255,14 +255,13 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,7 +282,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + z->in.size = in_size; + z->in.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + + z->out.dst = iov->iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index 098ef8842c..55d99a8232 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -141,10 +141,9 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + +@@ -153,7 +152,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -1099,7 +1098,7 @@ static void *multifd_recv_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + if (used) { +- ret = multifd_recv_state->ops->recv_pages(p, used, &local_err); ++ ret = multifd_recv_state->ops->recv_pages(p, &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 7968cc5c20..e57adc783b 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -167,7 +167,7 @@ typedef struct { + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); + /* Read all pages */ +- int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp); ++ int (*recv_pages)(MultiFDRecvParams *p, Error **errp); + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch new file mode 100644 index 0000000..e736ac4 --- /dev/null +++ b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -0,0 +1,56 @@ +From 2206114457f3c575b2cf148ff643f78d5e67e8d2 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:15 -0700 +Subject: [PATCH 4/7] vhost-net: fix improper cleanup in vhost_net_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] b09e0785b6e1c46da3ff59a8e4f4dea7d86e4f0b +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +vhost_net_start() missed a corresponding stop_one() upon error from +vhost_set_vring_enable(). While at it, make the error handling for +err_start more robust. No real issue was found due to this though. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) +Signed-off-by: Cindy Lu +--- + hw/net/vhost_net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 30379d2ca4..d6d7c51f62 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + r = vhost_set_vring_enable(peer, peer->vring_enable); + + if (r < 0) { ++ vhost_net_stop_one(get_vhost_net(peer), dev); + goto err_start; + } + } +@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + + err_start: + while (--i >= 0) { +- peer = qemu_get_peer(ncs , i); ++ peer = qemu_get_peer(ncs, i < data_queue_pairs ? ++ i : n->max_queue_pairs); + vhost_net_stop_one(get_vhost_net(peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch new file mode 100644 index 0000000..b4e1fd6 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -0,0 +1,58 @@ +From 975046c44909eef91e8b3cfafe1c0dd55151937b Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:16 -0700 +Subject: [PATCH 5/7] vhost-vdpa: backend feature should set only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 8db716cb0cec794861c7a63c0b33ab7f8367232d +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +The vhost_vdpa_one_time_request() branch in +vhost_vdpa_set_backend_cap() incorrectly sends down +ioctls on vhost_dev with non-zero index. This may +end up with multiple VHOST_SET_BACKEND_FEATURES +ioctl calls sent down on the vhost-vdpa fd that is +shared between all these vhost_dev's. + +To fix it, send down ioctl only once via the first +vhost_dev with index 0. Toggle the polarity of the +vhost_vdpa_one_time_request() test should do the +trick. + +Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") +Signed-off-by: Si-Wei Liu +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) +Signed-off-by: Cindy Lu +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 78da48a333..a9be24776a 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -525,7 +525,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch new file mode 100644 index 0000000..9912a5f --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -0,0 +1,123 @@ +From 073595e503c4f13a7e598bdb83b2ea724d169cbf Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:17 -0700 +Subject: [PATCH 6/7] vhost-vdpa: change name and polarity for + vhost_vdpa_one_time_request() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 9ab7ee150e1876a170b56db36e94a5b3bbf09535 +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +The name vhost_vdpa_one_time_request() was confusing. No +matter whatever it returns, its typical occurrence had +always been at requests that only need to be applied once. +And the name didn't suggest what it actually checks for. +Change it to vhost_vdpa_first_dev() with polarity flipped +for better readibility of code. That way it is able to +reflect what the check is really about. + +This call is applicable to request which performs operation +only once, before queues are set up, and usually at the beginning +of the caller function. Document the requirement for it in place. + +Signed-off-by: Si-Wei Liu +Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) +Signed-off-by: Cindy Lu +--- + hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index a9be24776a..38bbcb3c18 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -319,11 +319,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) + v->iova_range.last); + } + +-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) ++/* ++ * The use of this function is for requests that only need to be ++ * applied once. Typically such request occurs at the beginning ++ * of operation, and before setting up queues. It should not be ++ * used for request that performs operation until all queues are ++ * set, which would need to check dev->vq_index_end instead. ++ */ ++static bool vhost_vdpa_first_dev(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- return v->index != 0; ++ return v->index == 0; + } + + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) +@@ -351,7 +358,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + + vhost_vdpa_get_iova_range(v); + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -468,7 +475,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) + static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -496,7 +503,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + { + int ret; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -525,7 +532,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (!vhost_vdpa_one_time_request(dev)) { ++ if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +@@ -670,7 +677,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -739,7 +746,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, + + static int vhost_vdpa_set_owner(struct vhost_dev *dev) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch new file mode 100644 index 0000000..f258121 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -0,0 +1,48 @@ +From 004f7409312c45f17eaaceac697637d3da257964 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:14 -0700 +Subject: [PATCH 3/7] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] 5f5e7577818744305f811667461e530acd9977d1 +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +... such that no memory leaks on dangling net clients in case of +error. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) +Signed-off-by: Cindy Lu +--- + net/vhost-vdpa.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 25dd6dd975..814f704687 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + err: + if (i) { +- qemu_del_net_client(ncs[0]); ++ for (i--; i >= 0; i--) { ++ qemu_del_net_client(ncs[i]); ++ } + } + qemu_close(vdpa_device_fd); + g_free(ncs); +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch new file mode 100644 index 0000000..d454622 --- /dev/null +++ b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -0,0 +1,143 @@ +From e1ff655fb1ca5812d0456e2054a5a9ab755d399b Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:13 -0700 +Subject: [PATCH 2/7] virtio-net: align ctrl_vq index for non-mq guest for + vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] bc60c58d6e36ea0698a72d806ac930e5cca199c7 +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +With MQ enabled vdpa device and non-MQ supporting guest e.g. +booting vdpa with mq=on over OVMF of single vqp, below assert +failure is seen: + +../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. + +0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 +1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 +2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 +3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 +4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 +5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 +6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 +7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) + at ../hw/virtio/virtio-pci.c:974 +8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 +9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:361 +10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 +11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 +12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 +13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 +14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) + at ../softmmu/memory.c:492 +15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 +16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) + at ../softmmu/memory.c:1504 +17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 +18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 +19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) + at ../softmmu/physmem.c:2914 +20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., + attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 +21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 +22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 +23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 +24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 +25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 + +The cause for the assert failure is due to that the vhost_dev index +for the ctrl vq was not aligned with actual one in use by the guest. +Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), +if guest doesn't support multiqueue, the guest vq layout would shrink +to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). +This results in ctrl_vq taking a different vhost_dev group index than +the default. We can map vq to the correct vhost_dev group by checking +if MQ is supported by guest and successfully negotiated. Since the +MQ feature is only present along with CTRL_VQ, we ensure the index +2 is only meant for the control vq while MQ is not supported by guest. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Suggested-by: Jason Wang +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) +Signed-off-by: Cindy Lu +--- + hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ec045c3f41..f118379bb4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/atomic.h" + #include "qemu/iov.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" + #include "hw/virtio/virtio.h" +@@ -3163,8 +3164,22 @@ static NetClientInfo net_virtio_info = { + static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return false; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3172,8 +3187,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); + } +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch new file mode 100644 index 0000000..42d8939 --- /dev/null +++ b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -0,0 +1,109 @@ +From 3cd90463553c2e0915940b17d9955b4b0cf5ed11 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:18 -0700 +Subject: [PATCH 7/7] virtio-net: don't handle mq request in userspace handler + for vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] fc6f4e145f2d6abf96e75dd0d76ed04660a1f20a +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +virtio_queue_host_notifier_read() tends to read pending event +left behind on ioeventfd in the vhost_net_stop() path, and +attempts to handle outstanding kicks from userspace vq handler. +However, in the ctrl_vq handler, virtio_net_handle_mq() has a +recursive call into virtio_net_set_status(), which may lead to +segmentation fault as shown in below stack trace: + +0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 +1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 +2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 +3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 +4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 +5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 +6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 +7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 +11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 +12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 +13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 +14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 +15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) + at ../../../include/hw/virtio/virtio-bus.h:35 +16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 +17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 +21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 +22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 +23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 +24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 +25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 + +For now, temporarily disable handling MQ request from the ctrl_vq +userspace hanlder to avoid the recursive virtio_net_set_status() +call. Some rework is needed to allow changing the number of +queues without going through a full virtio_net_set_status cycle, +particularly for vhost-vdpa backend. + +This patch will need to be reverted as soon as future patches of +having the change of #queues handled in userspace is merged. + +Fixes: 402378407db ("vhost-vdpa: multiqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) +Signed-off-by: Cindy Lu +--- + hw/net/virtio-net.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f118379bb4..7e172ef829 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1373,6 +1373,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t queue_pairs; ++ NetClientState *nc = qemu_get_queue(n->nic); + + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { +@@ -1404,6 +1405,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + ++ /* Avoid changing the number of queue_pairs for vdpa device in ++ * userspace handler. A future fix is needed to handle the mq ++ * change in userspace handler with vhost-vdpa. Let's disable ++ * the mq handling from userspace for now and only allow get ++ * done through the kernel. Ripples may be seen when falling ++ * back to userspace, but without doing it qemu process would ++ * crash on a recursive entry to virtio_net_set_status(). ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return VIRTIO_NET_ERR; ++ } ++ + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch new file mode 100644 index 0000000..12b119f --- /dev/null +++ b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -0,0 +1,52 @@ +From 60224ca528f32d3f8e7a5ec65c1851c9b698dfb6 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:12 -0700 +Subject: [PATCH 1/7] virtio-net: setup vhost_dev and notifiers for cvq only + when feature is negotiated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 203: vdpa :sync the Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] fe2e27958344b5d5089b383f36c5d9e08472a591 +RH-Bugzilla: 2095794 +RH-Acked-by: MST +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang + +When the control virtqueue feature is absent or not negotiated, +vhost_net_start() still tries to set up vhost_dev and install +vhost notifiers for the control virtqueue, which results in +erroneous ioctl calls with incorrect queue index sending down +to driver. Do that only when needed. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) +Signed-off-by: Cindy Lu +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index e1f4748831..ec045c3f41 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -244,7 +244,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; +- int cvq = n->max_ncs - n->max_queue_pairs; ++ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? ++ n->max_ncs - n->max_queue_pairs : 0; + + if (!get_vhost_net(nc->peer)) { + return; +-- +2.31.1 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index ca923fd..91afbe2 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 11%{?rcrel}%{?dist}.2 +Release: 11%{?rcrel}%{?dist}.5 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -268,6 +268,92 @@ Patch83: kvm-iotests-108-Fix-when-missing-user_allow_other.patch Patch84: kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch # For bz#2082622 - CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.6.0.z] Patch85: kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch86: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch87: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch88: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch89: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch90: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch91: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +# For bz#2095794 - PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z] +Patch92: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch93: kvm-migration-Never-call-twice-qemu_target_page_size.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch94: kvm-multifd-Rename-used-field-to-num.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch95: kvm-multifd-Add-missing-documention.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch96: kvm-multifd-The-variable-is-only-used-inside-the-loop.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch97: kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch98: kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch99: kvm-multifd-Fill-offset-and-block-for-reception.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch100: kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch101: kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch102: kvm-migration-All-this-fields-are-unsigned.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch103: kvm-multifd-Move-iov-from-pages-to-params.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch104: kvm-multifd-Make-zlib-use-iov-s.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch105: kvm-multifd-Make-zstd-use-iov-s.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch106: kvm-multifd-Remove-send_write-method.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch107: kvm-multifd-Use-a-single-writev-on-the-send-side.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch108: kvm-multifd-Use-normal-pages-array-on-the-send-side.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch109: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch110: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch111: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch112: kvm-migration-Add-migrate_use_tls-helper.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch113: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch114: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch115: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch116: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch117: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch118: kvm-migration-Change-zero_copy_send-from-migration-param.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch119: kvm-migration-Introduce-ram_transferred_add.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch120: kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch121: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch122: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch123: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch124: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch125: kvm-migration-add-remaining-params-has_-true-in-migratio.patch +# For bz#2117252 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z] +Patch126: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +# For bz#2109570 - Stalled IO Operations in VM [rhel-8.6.0.z] +Patch127: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +# For bz#2109570 - Stalled IO Operations in VM [rhel-8.6.0.z] +Patch128: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch BuildRequires: wget @@ -1437,6 +1523,61 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Mon Aug 29 2022 Jon Maloy - 6.2.0-11.el8_6.5 +- kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2109570] +- kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2109570] +- Resolves: bz#2109570 + (Stalled IO Operations in VM [rhel-8.6.0.z]) + +* Tue Aug 16 2022 Jon Maloy - 6.2.0-11.el8_6.4 +- kvm-migration-Never-call-twice-qemu_target_page_size.patch [bz#2117252] +- kvm-multifd-Rename-used-field-to-num.patch [bz#2117252] +- kvm-multifd-Add-missing-documention.patch [bz#2117252] +- kvm-multifd-The-variable-is-only-used-inside-the-loop.patch [bz#2117252] +- kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch [bz#2117252] +- kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch [bz#2117252] +- kvm-multifd-Fill-offset-and-block-for-reception.patch [bz#2117252] +- kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch [bz#2117252] +- kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch [bz#2117252] +- kvm-migration-All-this-fields-are-unsigned.patch [bz#2117252] +- kvm-multifd-Move-iov-from-pages-to-params.patch [bz#2117252] +- kvm-multifd-Make-zlib-use-iov-s.patch [bz#2117252] +- kvm-multifd-Make-zstd-use-iov-s.patch [bz#2117252] +- kvm-multifd-Remove-send_write-method.patch [bz#2117252] +- kvm-multifd-Use-a-single-writev-on-the-send-side.patch [bz#2117252] +- kvm-multifd-Use-normal-pages-array-on-the-send-side.patch [bz#2117252] +- kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#2117252] +- kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#2117252] +- kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#2117252] +- kvm-migration-Add-migrate_use_tls-helper.patch [bz#2117252] +- kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#2117252] +- kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#2117252] +- kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#2117252] +- kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#2117252] +- kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#2117252] +- kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#2117252] +- kvm-migration-Introduce-ram_transferred_add.patch [bz#2117252] +- kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch [bz#2117252] +- kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2117252] +- kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2117252] +- kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2117252] +- kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2117252] +- kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2117252] +- kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2117252] +- Resolves: bz#2117252 + (Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 [rhel-8.6.0.z]) + +* Mon Aug 08 2022 Miroslav Rezanina - 6.2.0-11.el8_6.3 +- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2095794] +- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2095794] +- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2095794] +- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2095794] +- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2095794] +- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2095794] +- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2095794] +- Resolves: bz#2095794 + (PXE boot crash qemu when using multiqueue vDPA [rhel-8.6.0.z]) + * Wed Jun 15 2022 Jon Maloy - 6.2.0-11.el8_6.2 - kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch [bz#2077928] - kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch [bz#2082622]