ed5979
From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001
ed5979
From: "manish.mishra" <manish.mishra@nutanix.com>
ed5979
Date: Tue, 20 Dec 2022 18:44:17 +0000
ed5979
Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel
ed5979
MIME-Version: 1.0
ed5979
Content-Type: text/plain; charset=UTF-8
ed5979
Content-Transfer-Encoding: 8bit
ed5979
ed5979
RH-Author: Peter Xu <peterx@redhat.com>
ed5979
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
ed5979
RH-Bugzilla: 2169732
ed5979
RH-Acked-by: quintela1 <quintela@redhat.com>
ed5979
RH-Acked-by: Leonardo BrĂ¡s <leobras@redhat.com>
ed5979
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
ed5979
RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm)
ed5979
ed5979
MSG_PEEK peeks at the channel, The data is treated as unread and
ed5979
the next read shall still return this data. This support is
ed5979
currently added only for socket class. Extra parameter 'flags'
ed5979
is added to io_readv calls to pass extra read flags like MSG_PEEK.
ed5979
ed5979
Reviewed-by: Peter Xu <peterx@redhat.com>
ed5979
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
ed5979
Reviewed-by: Juan Quintela <quintela@redhat.com>
ed5979
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
ed5979
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
ed5979
Signed-off-by: Juan Quintela <quintela@redhat.com>
ed5979
(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3)
ed5979
Signed-off-by: Peter Xu <peterx@redhat.com>
ed5979
---
ed5979
 chardev/char-socket.c               |  4 ++--
ed5979
 include/io/channel.h                |  6 ++++++
ed5979
 io/channel-buffer.c                 |  1 +
ed5979
 io/channel-command.c                |  1 +
ed5979
 io/channel-file.c                   |  1 +
ed5979
 io/channel-null.c                   |  1 +
ed5979
 io/channel-socket.c                 | 19 ++++++++++++++++++-
ed5979
 io/channel-tls.c                    |  1 +
ed5979
 io/channel-websock.c                |  1 +
ed5979
 io/channel.c                        | 16 ++++++++++++----
ed5979
 migration/channel-block.c           |  1 +
ed5979
 migration/rdma.c                    |  1 +
ed5979
 scsi/qemu-pr-helper.c               |  2 +-
ed5979
 tests/qtest/tpm-emu.c               |  2 +-
ed5979
 tests/unit/test-io-channel-socket.c |  1 +
ed5979
 util/vhost-user-server.c            |  2 +-
ed5979
 16 files changed, 50 insertions(+), 10 deletions(-)
ed5979
ed5979
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
ed5979
index 879564aa8a..5afce9a464 100644
ed5979
--- a/chardev/char-socket.c
ed5979
+++ b/chardev/char-socket.c
ed5979
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
ed5979
     if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
ed5979
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
ed5979
                                      &msgfds, &msgfds_num,
ed5979
-                                     NULL);
ed5979
+                                     0, NULL);
ed5979
     } else {
ed5979
         ret = qio_channel_readv_full(s->ioc, &iov, 1,
ed5979
                                      NULL, NULL,
ed5979
-                                     NULL);
ed5979
+                                     0, NULL);
ed5979
     }
ed5979
 
ed5979
     if (msgfds_num) {
ed5979
diff --git a/include/io/channel.h b/include/io/channel.h
ed5979
index c680ee7480..716235d496 100644
ed5979
--- a/include/io/channel.h
ed5979
+++ b/include/io/channel.h
ed5979
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
ed5979
 
ed5979
 #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
ed5979
 
ed5979
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
ed5979
+
ed5979
 typedef enum QIOChannelFeature QIOChannelFeature;
ed5979
 
ed5979
 enum QIOChannelFeature {
ed5979
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
ed5979
     QIO_CHANNEL_FEATURE_SHUTDOWN,
ed5979
     QIO_CHANNEL_FEATURE_LISTEN,
ed5979
     QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
ed5979
+    QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
ed5979
 };
ed5979
 
ed5979
 
ed5979
@@ -114,6 +117,7 @@ struct QIOChannelClass {
ed5979
                         size_t niov,
ed5979
                         int **fds,
ed5979
                         size_t *nfds,
ed5979
+                        int flags,
ed5979
                         Error **errp);
ed5979
     int (*io_close)(QIOChannel *ioc,
ed5979
                     Error **errp);
ed5979
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
ed5979
  * @niov: the length of the @iov array
ed5979
  * @fds: pointer to an array that will received file handles
ed5979
  * @nfds: pointer filled with number of elements in @fds on return
ed5979
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
ed5979
  * @errp: pointer to a NULL-initialized error object
ed5979
  *
ed5979
  * Read data from the IO channel, storing it in the
ed5979
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
ed5979
                                size_t niov,
ed5979
                                int **fds,
ed5979
                                size_t *nfds,
ed5979
+                               int flags,
ed5979
                                Error **errp);
ed5979
 
ed5979
 
ed5979
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
ed5979
index bf52011be2..8096180f85 100644
ed5979
--- a/io/channel-buffer.c
ed5979
+++ b/io/channel-buffer.c
ed5979
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
ed5979
                                         size_t niov,
ed5979
                                         int **fds,
ed5979
                                         size_t *nfds,
ed5979
+                                        int flags,
ed5979
                                         Error **errp)
ed5979
 {
ed5979
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
ed5979
diff --git a/io/channel-command.c b/io/channel-command.c
ed5979
index 74516252ba..e7edd091af 100644
ed5979
--- a/io/channel-command.c
ed5979
+++ b/io/channel-command.c
ed5979
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
ed5979
                                          size_t niov,
ed5979
                                          int **fds,
ed5979
                                          size_t *nfds,
ed5979
+                                         int flags,
ed5979
                                          Error **errp)
ed5979
 {
ed5979
     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
ed5979
diff --git a/io/channel-file.c b/io/channel-file.c
ed5979
index b67687c2aa..d76663e6ae 100644
ed5979
--- a/io/channel-file.c
ed5979
+++ b/io/channel-file.c
ed5979
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
ed5979
                                       size_t niov,
ed5979
                                       int **fds,
ed5979
                                       size_t *nfds,
ed5979
+                                      int flags,
ed5979
                                       Error **errp)
ed5979
 {
ed5979
     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
ed5979
diff --git a/io/channel-null.c b/io/channel-null.c
ed5979
index 75e3781507..4fafdb770d 100644
ed5979
--- a/io/channel-null.c
ed5979
+++ b/io/channel-null.c
ed5979
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
ed5979
                        size_t niov,
ed5979
                        int **fds G_GNUC_UNUSED,
ed5979
                        size_t *nfds G_GNUC_UNUSED,
ed5979
+                       int flags,
ed5979
                        Error **errp)
ed5979
 {
ed5979
     QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
ed5979
diff --git a/io/channel-socket.c b/io/channel-socket.c
ed5979
index b76dca9cc1..7aca84f61a 100644
ed5979
--- a/io/channel-socket.c
ed5979
+++ b/io/channel-socket.c
ed5979
@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
ed5979
     }
ed5979
 #endif
ed5979
 
ed5979
+    qio_channel_set_feature(QIO_CHANNEL(ioc),
ed5979
+                            QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
ed5979
+
ed5979
     return 0;
ed5979
 }
ed5979
 
ed5979
@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
ed5979
     }
ed5979
 #endif /* WIN32 */
ed5979
 
ed5979
+    qio_channel_set_feature(QIO_CHANNEL(cioc),
ed5979
+                            QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
ed5979
+
ed5979
     trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
ed5979
     return cioc;
ed5979
 
ed5979
@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ed5979
                                         size_t niov,
ed5979
                                         int **fds,
ed5979
                                         size_t *nfds,
ed5979
+                                        int flags,
ed5979
                                         Error **errp)
ed5979
 {
ed5979
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
ed5979
@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ed5979
 
ed5979
     }
ed5979
 
ed5979
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
ed5979
+        sflags |= MSG_PEEK;
ed5979
+    }
ed5979
+
ed5979
  retry:
ed5979
     ret = recvmsg(sioc->fd, &msg, sflags);
ed5979
     if (ret < 0) {
ed5979
@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ed5979
                                         size_t niov,
ed5979
                                         int **fds,
ed5979
                                         size_t *nfds,
ed5979
+                                        int flags,
ed5979
                                         Error **errp)
ed5979
 {
ed5979
     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
ed5979
     ssize_t done = 0;
ed5979
     ssize_t i;
ed5979
+    int sflags = 0;
ed5979
+
ed5979
+    if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
ed5979
+        sflags |= MSG_PEEK;
ed5979
+    }
ed5979
 
ed5979
     for (i = 0; i < niov; i++) {
ed5979
         ssize_t ret;
ed5979
@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ed5979
         ret = recv(sioc->fd,
ed5979
                    iov[i].iov_base,
ed5979
                    iov[i].iov_len,
ed5979
-                   0);
ed5979
+                   sflags);
ed5979
         if (ret < 0) {
ed5979
             if (errno == EAGAIN) {
ed5979
                 if (done) {
ed5979
diff --git a/io/channel-tls.c b/io/channel-tls.c
ed5979
index 4ce890a538..c730cb8ec5 100644
ed5979
--- a/io/channel-tls.c
ed5979
+++ b/io/channel-tls.c
ed5979
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
ed5979
                                      size_t niov,
ed5979
                                      int **fds,
ed5979
                                      size_t *nfds,
ed5979
+                                     int flags,
ed5979
                                      Error **errp)
ed5979
 {
ed5979
     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
ed5979
diff --git a/io/channel-websock.c b/io/channel-websock.c
ed5979
index fb4932ade7..a12acc27cf 100644
ed5979
--- a/io/channel-websock.c
ed5979
+++ b/io/channel-websock.c
ed5979
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
ed5979
                                          size_t niov,
ed5979
                                          int **fds,
ed5979
                                          size_t *nfds,
ed5979
+                                         int flags,
ed5979
                                          Error **errp)
ed5979
 {
ed5979
     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
ed5979
diff --git a/io/channel.c b/io/channel.c
ed5979
index 0640941ac5..a8c7f11649 100644
ed5979
--- a/io/channel.c
ed5979
+++ b/io/channel.c
ed5979
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
ed5979
                                size_t niov,
ed5979
                                int **fds,
ed5979
                                size_t *nfds,
ed5979
+                               int flags,
ed5979
                                Error **errp)
ed5979
 {
ed5979
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
ed5979
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
ed5979
         return -1;
ed5979
     }
ed5979
 
ed5979
-    return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
ed5979
+    if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
ed5979
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
ed5979
+        error_setg_errno(errp, EINVAL,
ed5979
+                         "Channel does not support peek read");
ed5979
+        return -1;
ed5979
+    }
ed5979
+
ed5979
+    return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
ed5979
 }
ed5979
 
ed5979
 
ed5979
@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
ed5979
     while ((nlocal_iov > 0) || local_fds) {
ed5979
         ssize_t len;
ed5979
         len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
ed5979
-                                     local_nfds, errp);
ed5979
+                                     local_nfds, 0, errp);
ed5979
         if (len == QIO_CHANNEL_ERR_BLOCK) {
ed5979
             if (qemu_in_coroutine()) {
ed5979
                 qio_channel_yield(ioc, G_IO_IN);
ed5979
@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
ed5979
                           size_t niov,
ed5979
                           Error **errp)
ed5979
 {
ed5979
-    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
ed5979
+    return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
ed5979
 }
ed5979
 
ed5979
 
ed5979
@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
ed5979
                          Error **errp)
ed5979
 {
ed5979
     struct iovec iov = { .iov_base = buf, .iov_len = buflen };
ed5979
-    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
ed5979
+    return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
ed5979
 }
ed5979
 
ed5979
 
ed5979
diff --git a/migration/channel-block.c b/migration/channel-block.c
ed5979
index f4ab53acdb..b7374363c3 100644
ed5979
--- a/migration/channel-block.c
ed5979
+++ b/migration/channel-block.c
ed5979
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
ed5979
                         size_t niov,
ed5979
                         int **fds,
ed5979
                         size_t *nfds,
ed5979
+                        int flags,
ed5979
                         Error **errp)
ed5979
 {
ed5979
     QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
ed5979
diff --git a/migration/rdma.c b/migration/rdma.c
ed5979
index 94a55dd95b..d8b4632094 100644
ed5979
--- a/migration/rdma.c
ed5979
+++ b/migration/rdma.c
ed5979
@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
ed5979
                                       size_t niov,
ed5979
                                       int **fds,
ed5979
                                       size_t *nfds,
ed5979
+                                      int flags,
ed5979
                                       Error **errp)
ed5979
 {
ed5979
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
ed5979
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
ed5979
index 196b78c00d..199227a556 100644
ed5979
--- a/scsi/qemu-pr-helper.c
ed5979
+++ b/scsi/qemu-pr-helper.c
ed5979
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
ed5979
         iov.iov_base = buf;
ed5979
         iov.iov_len = sz;
ed5979
         n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
ed5979
-                                        &fds, &nfds, errp);
ed5979
+                                        &fds, &nfds, 0, errp);
ed5979
 
ed5979
         if (n_read == QIO_CHANNEL_ERR_BLOCK) {
ed5979
             qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
ed5979
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
ed5979
index 2994d1cf42..3cf1acaf7d 100644
ed5979
--- a/tests/qtest/tpm-emu.c
ed5979
+++ b/tests/qtest/tpm-emu.c
ed5979
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
ed5979
         int *pfd = NULL;
ed5979
         size_t nfd = 0;
ed5979
 
ed5979
-        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
ed5979
+        qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
ed5979
         cmd = be32_to_cpu(cmd);
ed5979
         g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
ed5979
         g_assert_cmpint(nfd, ==, 1);
ed5979
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
ed5979
index b36a5d972a..b964bb202d 100644
ed5979
--- a/tests/unit/test-io-channel-socket.c
ed5979
+++ b/tests/unit/test-io-channel-socket.c
ed5979
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
ed5979
                            G_N_ELEMENTS(iorecv),
ed5979
                            &fdrecv,
ed5979
                            &nfdrecv,
ed5979
+                           0,
ed5979
                            &error_abort);
ed5979
 
ed5979
     g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
ed5979
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
ed5979
index 232984ace6..145eb17c08 100644
ed5979
--- a/util/vhost-user-server.c
ed5979
+++ b/util/vhost-user-server.c
ed5979
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
ed5979
          * qio_channel_readv_full may have short reads, keeping calling it
ed5979
          * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
ed5979
          */
ed5979
-        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
ed5979
+        rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
ed5979
         if (rc < 0) {
ed5979
             if (rc == QIO_CHANNEL_ERR_BLOCK) {
ed5979
                 assert(local_err == NULL);
ed5979
-- 
ed5979
2.31.1
ed5979