From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:20 +0100
Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-46-dgilbert@redhat.com>
Patchwork-id: 93497
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>

From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Pass the write iov pointing to guest RAM all the way through rather
than copying the data.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
 tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 6 deletions(-)

diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index fd588a4..872968f 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque)
                  __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
 
         while (1) {
+            bool allocated_bufv = false;
+            struct fuse_bufvec bufv;
+            struct fuse_bufvec *pbufv;
+
             /*
              * An element contains one request and the space to send our
              * response They're spread over multiple descriptors in a
@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque)
                          __func__, elem->index);
                 assert(0); /* TODO */
             }
-            copy_from_iov(&fbuf, out_num, out_sg);
-            fbuf.size = out_len;
+            /* Copy just the first element and look at it */
+            copy_from_iov(&fbuf, 1, out_sg);
+
+            if (out_num > 2 &&
+                out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+                ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+                out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+                /*
+                 * For a write we don't actually need to copy the
+                 * data, we can just do it straight out of guest memory
+                 * but we must still copy the headers in case the guest
+                 * was nasty and changed them while we were using them.
+                 */
+                fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
+
+                /* copy the fuse_write_in header after the fuse_in_header */
+                fbuf.mem += out_sg->iov_len;
+                copy_from_iov(&fbuf, 1, out_sg + 1);
+                fbuf.mem -= out_sg->iov_len;
+                fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+                /* Allocate the bufv, with space for the rest of the iov */
+                allocated_bufv = true;
+                pbufv = malloc(sizeof(struct fuse_bufvec) +
+                               sizeof(struct fuse_buf) * (out_num - 2));
+                if (!pbufv) {
+                    vu_queue_unpop(dev, q, elem, 0);
+                    free(elem);
+                    fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+                             __func__);
+                    goto out;
+                }
+
+                pbufv->count = 1;
+                pbufv->buf[0] = fbuf;
+
+                size_t iovindex, pbufvindex;
+                iovindex = 2; /* 2 headers, separate iovs */
+                pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+                for (; iovindex < out_num; iovindex++, pbufvindex++) {
+                    pbufv->count++;
+                    pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+                    pbufv->buf[pbufvindex].flags = 0;
+                    pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+                    pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+                }
+            } else {
+                /* Normal (non fast write) path */
+
+                /* Copy the rest of the buffer */
+                fbuf.mem += out_sg->iov_len;
+                copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+                fbuf.mem -= out_sg->iov_len;
+                fbuf.size = out_len;
 
-            /* TODO! Endianness of header */
+                /* TODO! Endianness of header */
 
-            /* TODO: Add checks for fuse_session_exited */
-            struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 };
-            fuse_session_process_buf_int(se, &bufv, &ch);
+                /* TODO: Add checks for fuse_session_exited */
+                bufv.buf[0] = fbuf;
+                bufv.count = 1;
+                pbufv = &bufv;
+            }
+            pbufv->idx = 0;
+            pbufv->off = 0;
+            fuse_session_process_buf_int(se, pbufv, &ch);
+
+            if (allocated_bufv) {
+                free(pbufv);
+            }
 
             if (!qi->reply_sent) {
                 fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque)
             elem = NULL;
         }
     }
+out:
     pthread_mutex_destroy(&ch.lock);
     free(fbuf.mem);
 
-- 
1.8.3.1