586cba
From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001
586cba
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
586cba
Date: Thu, 21 Jul 2022 16:05:40 +0200
586cba
Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue
586cba
MIME-Version: 1.0
586cba
Content-Type: text/plain; charset=UTF-8
586cba
Content-Transfer-Encoding: 8bit
586cba
586cba
RH-Author: Eugenio Pérez <eperezma@redhat.com>
586cba
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
586cba
RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm)
586cba
RH-Bugzilla: 1939363
586cba
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
586cba
RH-Acked-by: Cindy Lu <lulu@redhat.com>
586cba
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
586cba
586cba
Bugzilla: https://bugzilla.redhat.com/1939363
586cba
586cba
Upstream Status: git://git.qemu.org/qemu.git
586cba
586cba
commit 2df4dd31e194c94da7d28c02e92449f4a989fca9
586cba
Author: Eugenio Pérez <eperezma@redhat.com>
586cba
Date:   Wed Jul 20 08:59:43 2022 +0200
586cba
586cba
    vdpa: Buffer CVQ support on shadow virtqueue
586cba
586cba
    Introduce the control virtqueue support for vDPA shadow virtqueue. This
586cba
    is needed for advanced networking features like rx filtering.
586cba
586cba
    Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
586cba
    TOCTOU with the guest's or device's memory every time there is a device
586cba
    model change.  Otherwise, the guest could change the memory content in
586cba
    the time between qemu and the device read it.
586cba
586cba
    To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
586cba
    implemented.  If the virtio-net driver changes MAC the virtio-net device
586cba
    model will be updated with the new one, and a rx filtering change event
586cba
    will be raised.
586cba
586cba
    More cvq commands could be added here straightforwardly but they have
586cba
    not been tested.
586cba
586cba
    Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
586cba
    Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
586cba
    Signed-off-by: Jason Wang <jasowang@redhat.com>
586cba
586cba
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
586cba
---
586cba
 net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++--
586cba
 1 file changed, 205 insertions(+), 8 deletions(-)
586cba
586cba
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
586cba
index 2e3b6b10d8..df42822463 100644
586cba
--- a/net/vhost-vdpa.c
586cba
+++ b/net/vhost-vdpa.c
586cba
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
586cba
     NetClientState nc;
586cba
     struct vhost_vdpa vhost_vdpa;
586cba
     VHostNetState *vhost_net;
586cba
+
586cba
+    /* Control commands shadow buffers */
586cba
+    void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
586cba
     bool started;
586cba
 } VhostVDPAState;
586cba
 
586cba
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
586cba
 {
586cba
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
586cba
 
586cba
+    qemu_vfree(s->cvq_cmd_out_buffer);
586cba
+    qemu_vfree(s->cvq_cmd_in_buffer);
586cba
     if (s->vhost_net) {
586cba
         vhost_net_cleanup(s->vhost_net);
586cba
         g_free(s->vhost_net);
586cba
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
586cba
         .check_peer_type = vhost_vdpa_check_peer_type,
586cba
 };
586cba
 
586cba
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
586cba
+{
586cba
+    VhostIOVATree *tree = v->iova_tree;
586cba
+    DMAMap needle = {
586cba
+        /*
586cba
+         * No need to specify size or to look for more translations since
586cba
+         * this contiguous chunk was allocated by us.
586cba
+         */
586cba
+        .translated_addr = (hwaddr)(uintptr_t)addr,
586cba
+    };
586cba
+    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
586cba
+    int r;
586cba
+
586cba
+    if (unlikely(!map)) {
586cba
+        error_report("Cannot locate expected map");
586cba
+        return;
586cba
+    }
586cba
+
586cba
+    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
586cba
+    if (unlikely(r != 0)) {
586cba
+        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
586cba
+    }
586cba
+
586cba
+    vhost_iova_tree_remove(tree, map);
586cba
+}
586cba
+
586cba
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
586cba
+{
586cba
+    /*
586cba
+     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
586cba
+     * In buffer is always 1 byte, so it should fit here
586cba
+     */
586cba
+    return sizeof(struct virtio_net_ctrl_hdr) +
586cba
+           2 * sizeof(struct virtio_net_ctrl_mac) +
586cba
+           MAC_TABLE_ENTRIES * ETH_ALEN;
586cba
+}
586cba
+
586cba
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
586cba
+{
586cba
+    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size);
586cba
+}
586cba
+
586cba
+/** Copy and map a guest buffer. */
586cba
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
586cba
+                                   const struct iovec *out_data,
586cba
+                                   size_t out_num, size_t data_len, void *buf,
586cba
+                                   size_t *written, bool write)
586cba
+{
586cba
+    DMAMap map = {};
586cba
+    int r;
586cba
+
586cba
+    if (unlikely(!data_len)) {
586cba
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
586cba
+                      __func__, write ? "in" : "out");
586cba
+        return false;
586cba
+    }
586cba
+
586cba
+    *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
586cba
+    map.translated_addr = (hwaddr)(uintptr_t)buf;
586cba
+    map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
586cba
+    map.perm = write ? IOMMU_RW : IOMMU_RO,
586cba
+    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
586cba
+    if (unlikely(r != IOVA_OK)) {
586cba
+        error_report("Cannot map injected element");
586cba
+        return false;
586cba
+    }
586cba
+
586cba
+    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
586cba
+                           !write);
586cba
+    if (unlikely(r < 0)) {
586cba
+        goto dma_map_err;
586cba
+    }
586cba
+
586cba
+    return true;
586cba
+
586cba
+dma_map_err:
586cba
+    vhost_iova_tree_remove(v->iova_tree, &map);
586cba
+    return false;
586cba
+}
586cba
+
586cba
 /**
586cba
- * Forward buffer for the moment.
586cba
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
586cba
+ *
586cba
+ * @iov: [0] is the out buffer, [1] is the in one
586cba
+ */
586cba
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
586cba
+                                        VirtQueueElement *elem,
586cba
+                                        struct iovec *iov)
586cba
+{
586cba
+    size_t in_copied;
586cba
+    bool ok;
586cba
+
586cba
+    iov[0].iov_base = s->cvq_cmd_out_buffer;
586cba
+    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
586cba
+                                vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
586cba
+                                &iov[0].iov_len, false);
586cba
+    if (unlikely(!ok)) {
586cba
+        return false;
586cba
+    }
586cba
+
586cba
+    iov[1].iov_base = s->cvq_cmd_in_buffer;
586cba
+    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
586cba
+                                sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
586cba
+                                &in_copied, true);
586cba
+    if (unlikely(!ok)) {
586cba
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
586cba
+        return false;
586cba
+    }
586cba
+
586cba
+    iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
586cba
+    return true;
586cba
+}
586cba
+
586cba
+/**
586cba
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
586cba
+ * accept it and qemu would not know how to update the device model.
586cba
+ */
586cba
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
586cba
+                                            size_t out_num)
586cba
+{
586cba
+    struct virtio_net_ctrl_hdr ctrl;
586cba
+    size_t n;
586cba
+
586cba
+    n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
586cba
+    if (unlikely(n < sizeof(ctrl))) {
586cba
+        qemu_log_mask(LOG_GUEST_ERROR,
586cba
+                      "%s: invalid legnth of out buffer %zu\n", __func__, n);
586cba
+        return false;
586cba
+    }
586cba
+
586cba
+    switch (ctrl.class) {
586cba
+    case VIRTIO_NET_CTRL_MAC:
586cba
+        switch (ctrl.cmd) {
586cba
+        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
586cba
+            return true;
586cba
+        default:
586cba
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
586cba
+                          __func__, ctrl.cmd);
586cba
+        };
586cba
+        break;
586cba
+    default:
586cba
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
586cba
+                      __func__, ctrl.class);
586cba
+    };
586cba
+
586cba
+    return false;
586cba
+}
586cba
+
586cba
+/**
586cba
+ * Validate and copy control virtqueue commands.
586cba
+ *
586cba
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
586cba
+ * prevent TOCTOU bugs.
586cba
  */
586cba
 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
586cba
                                             VirtQueueElement *elem,
586cba
                                             void *opaque)
586cba
 {
586cba
-    unsigned int n = elem->out_num + elem->in_num;
586cba
-    g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
586cba
+    VhostVDPAState *s = opaque;
586cba
     size_t in_len, dev_written;
586cba
     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
586cba
-    int r;
586cba
+    /* out and in buffers sent to the device */
586cba
+    struct iovec dev_buffers[2] = {
586cba
+        { .iov_base = s->cvq_cmd_out_buffer },
586cba
+        { .iov_base = s->cvq_cmd_in_buffer },
586cba
+    };
586cba
+    /* in buffer used for device model */
586cba
+    const struct iovec in = {
586cba
+        .iov_base = &status,
586cba
+        .iov_len = sizeof(status),
586cba
+    };
586cba
+    int r = -EINVAL;
586cba
+    bool ok;
586cba
+
586cba
+    ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
586cba
+    if (unlikely(!ok)) {
586cba
+        goto out;
586cba
+    }
586cba
 
586cba
-    memcpy(dev_buffers, elem->out_sg, elem->out_num);
586cba
-    memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
586cba
+    ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
586cba
+    if (unlikely(!ok)) {
586cba
+        goto out;
586cba
+    }
586cba
 
586cba
-    r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
586cba
-                      elem->in_num, elem);
586cba
+    r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
586cba
     if (unlikely(r != 0)) {
586cba
         if (unlikely(r == -ENOSPC)) {
586cba
             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
586cba
@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
586cba
     dev_written = vhost_svq_poll(svq);
586cba
     if (unlikely(dev_written < sizeof(status))) {
586cba
         error_report("Insufficient written data (%zu)", dev_written);
586cba
+        goto out;
586cba
+    }
586cba
+
586cba
+    memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
586cba
+    if (status != VIRTIO_NET_OK) {
586cba
+        goto out;
586cba
+    }
586cba
+
586cba
+    status = VIRTIO_NET_ERR;
586cba
+    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
586cba
+    if (status != VIRTIO_NET_OK) {
586cba
+        error_report("Bad CVQ processing in model");
586cba
     }
586cba
 
586cba
 out:
586cba
@@ -234,6 +418,12 @@ out:
586cba
     }
586cba
     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
586cba
     g_free(elem);
586cba
+    if (dev_buffers[0].iov_base) {
586cba
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
586cba
+    }
586cba
+    if (dev_buffers[1].iov_base) {
586cba
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
586cba
+    }
586cba
     return r;
586cba
 }
586cba
 
586cba
@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
586cba
     s->vhost_vdpa.device_fd = vdpa_device_fd;
586cba
     s->vhost_vdpa.index = queue_pair_index;
586cba
     if (!is_datapath) {
586cba
+        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size,
586cba
+                                            vhost_vdpa_net_cvq_cmd_page_len());
586cba
+        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
586cba
+        s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size,
586cba
+                                            vhost_vdpa_net_cvq_cmd_page_len());
586cba
+        memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
586cba
+
586cba
         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
586cba
         s->vhost_vdpa.shadow_vq_ops_opaque = s;
586cba
     }
586cba
-- 
586cba
2.31.1
586cba