|
|
29b115 |
From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001
|
|
|
29b115 |
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
|
|
|
29b115 |
Date: Thu, 21 Jul 2022 16:05:40 +0200
|
|
|
29b115 |
Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue
|
|
|
29b115 |
MIME-Version: 1.0
|
|
|
29b115 |
Content-Type: text/plain; charset=UTF-8
|
|
|
29b115 |
Content-Transfer-Encoding: 8bit
|
|
|
29b115 |
|
|
|
29b115 |
RH-Author: Eugenio Pérez <eperezma@redhat.com>
|
|
|
29b115 |
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
|
|
|
29b115 |
RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm)
|
|
|
29b115 |
RH-Bugzilla: 1939363
|
|
|
29b115 |
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
|
|
29b115 |
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
|
|
|
29b115 |
|
|
|
29b115 |
Bugzilla: https://bugzilla.redhat.com/1939363
|
|
|
29b115 |
|
|
|
29b115 |
Upstream Status: git://git.qemu.org/qemu.git
|
|
|
29b115 |
|
|
|
29b115 |
commit 2df4dd31e194c94da7d28c02e92449f4a989fca9
|
|
|
29b115 |
Author: Eugenio Pérez <eperezma@redhat.com>
|
|
|
29b115 |
Date: Wed Jul 20 08:59:43 2022 +0200
|
|
|
29b115 |
|
|
|
29b115 |
vdpa: Buffer CVQ support on shadow virtqueue
|
|
|
29b115 |
|
|
|
29b115 |
Introduce the control virtqueue support for vDPA shadow virtqueue. This
|
|
|
29b115 |
is needed for advanced networking features like rx filtering.
|
|
|
29b115 |
|
|
|
29b115 |
Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
|
|
|
29b115 |
TOCTOU with the guest's or device's memory every time there is a device
|
|
|
29b115 |
model change. Otherwise, the guest could change the memory content in
|
|
|
29b115 |
the time between qemu and the device read it.
|
|
|
29b115 |
|
|
|
29b115 |
To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
|
|
|
29b115 |
implemented. If the virtio-net driver changes MAC the virtio-net device
|
|
|
29b115 |
model will be updated with the new one, and a rx filtering change event
|
|
|
29b115 |
will be raised.
|
|
|
29b115 |
|
|
|
29b115 |
More cvq commands could be added here straightforwardly but they have
|
|
|
29b115 |
not been tested.
|
|
|
29b115 |
|
|
|
29b115 |
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
|
|
|
29b115 |
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
29b115 |
Signed-off-by: Jason Wang <jasowang@redhat.com>
|
|
|
29b115 |
|
|
|
29b115 |
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
|
|
|
29b115 |
---
|
|
|
29b115 |
net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
29b115 |
1 file changed, 205 insertions(+), 8 deletions(-)
|
|
|
29b115 |
|
|
|
29b115 |
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
|
|
|
29b115 |
index 2e3b6b10d8..df42822463 100644
|
|
|
29b115 |
--- a/net/vhost-vdpa.c
|
|
|
29b115 |
+++ b/net/vhost-vdpa.c
|
|
|
29b115 |
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
|
|
|
29b115 |
NetClientState nc;
|
|
|
29b115 |
struct vhost_vdpa vhost_vdpa;
|
|
|
29b115 |
VHostNetState *vhost_net;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ /* Control commands shadow buffers */
|
|
|
29b115 |
+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
|
|
|
29b115 |
bool started;
|
|
|
29b115 |
} VhostVDPAState;
|
|
|
29b115 |
|
|
|
29b115 |
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
|
|
|
29b115 |
{
|
|
|
29b115 |
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
|
|
29b115 |
|
|
|
29b115 |
+ qemu_vfree(s->cvq_cmd_out_buffer);
|
|
|
29b115 |
+ qemu_vfree(s->cvq_cmd_in_buffer);
|
|
|
29b115 |
if (s->vhost_net) {
|
|
|
29b115 |
vhost_net_cleanup(s->vhost_net);
|
|
|
29b115 |
g_free(s->vhost_net);
|
|
|
29b115 |
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
|
|
|
29b115 |
.check_peer_type = vhost_vdpa_check_peer_type,
|
|
|
29b115 |
};
|
|
|
29b115 |
|
|
|
29b115 |
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ VhostIOVATree *tree = v->iova_tree;
|
|
|
29b115 |
+ DMAMap needle = {
|
|
|
29b115 |
+ /*
|
|
|
29b115 |
+ * No need to specify size or to look for more translations since
|
|
|
29b115 |
+ * this contiguous chunk was allocated by us.
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+ .translated_addr = (hwaddr)(uintptr_t)addr,
|
|
|
29b115 |
+ };
|
|
|
29b115 |
+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
|
|
|
29b115 |
+ int r;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ if (unlikely(!map)) {
|
|
|
29b115 |
+ error_report("Cannot locate expected map");
|
|
|
29b115 |
+ return;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
|
|
|
29b115 |
+ if (unlikely(r != 0)) {
|
|
|
29b115 |
+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ vhost_iova_tree_remove(tree, map);
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ /*
|
|
|
29b115 |
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
|
|
|
29b115 |
+ * In buffer is always 1 byte, so it should fit here
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+ return sizeof(struct virtio_net_ctrl_hdr) +
|
|
|
29b115 |
+ 2 * sizeof(struct virtio_net_ctrl_mac) +
|
|
|
29b115 |
+ MAC_TABLE_ENTRIES * ETH_ALEN;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size);
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+/** Copy and map a guest buffer. */
|
|
|
29b115 |
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
|
|
|
29b115 |
+ const struct iovec *out_data,
|
|
|
29b115 |
+ size_t out_num, size_t data_len, void *buf,
|
|
|
29b115 |
+ size_t *written, bool write)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ DMAMap map = {};
|
|
|
29b115 |
+ int r;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ if (unlikely(!data_len)) {
|
|
|
29b115 |
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
|
|
|
29b115 |
+ __func__, write ? "in" : "out");
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
|
|
|
29b115 |
+ map.translated_addr = (hwaddr)(uintptr_t)buf;
|
|
|
29b115 |
+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
|
|
|
29b115 |
+ map.perm = write ? IOMMU_RW : IOMMU_RO,
|
|
|
29b115 |
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
|
|
|
29b115 |
+ if (unlikely(r != IOVA_OK)) {
|
|
|
29b115 |
+ error_report("Cannot map injected element");
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
|
|
|
29b115 |
+ !write);
|
|
|
29b115 |
+ if (unlikely(r < 0)) {
|
|
|
29b115 |
+ goto dma_map_err;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ return true;
|
|
|
29b115 |
+
|
|
|
29b115 |
+dma_map_err:
|
|
|
29b115 |
+ vhost_iova_tree_remove(v->iova_tree, &map);
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
/**
|
|
|
29b115 |
- * Forward buffer for the moment.
|
|
|
29b115 |
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
|
|
|
29b115 |
+ *
|
|
|
29b115 |
+ * @iov: [0] is the out buffer, [1] is the in one
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
|
|
|
29b115 |
+ VirtQueueElement *elem,
|
|
|
29b115 |
+ struct iovec *iov)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ size_t in_copied;
|
|
|
29b115 |
+ bool ok;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ iov[0].iov_base = s->cvq_cmd_out_buffer;
|
|
|
29b115 |
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
|
|
|
29b115 |
+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
|
|
|
29b115 |
+ &iov[0].iov_len, false);
|
|
|
29b115 |
+ if (unlikely(!ok)) {
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ iov[1].iov_base = s->cvq_cmd_in_buffer;
|
|
|
29b115 |
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
|
|
|
29b115 |
+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
|
|
|
29b115 |
+ &in_copied, true);
|
|
|
29b115 |
+ if (unlikely(!ok)) {
|
|
|
29b115 |
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
|
|
|
29b115 |
+ return true;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+/**
|
|
|
29b115 |
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
|
|
|
29b115 |
+ * accept it and qemu would not know how to update the device model.
|
|
|
29b115 |
+ */
|
|
|
29b115 |
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
|
|
|
29b115 |
+ size_t out_num)
|
|
|
29b115 |
+{
|
|
|
29b115 |
+ struct virtio_net_ctrl_hdr ctrl;
|
|
|
29b115 |
+ size_t n;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
|
|
|
29b115 |
+ if (unlikely(n < sizeof(ctrl))) {
|
|
|
29b115 |
+ qemu_log_mask(LOG_GUEST_ERROR,
|
|
|
29b115 |
+ "%s: invalid legnth of out buffer %zu\n", __func__, n);
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ switch (ctrl.class) {
|
|
|
29b115 |
+ case VIRTIO_NET_CTRL_MAC:
|
|
|
29b115 |
+ switch (ctrl.cmd) {
|
|
|
29b115 |
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
|
|
|
29b115 |
+ return true;
|
|
|
29b115 |
+ default:
|
|
|
29b115 |
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
|
|
|
29b115 |
+ __func__, ctrl.cmd);
|
|
|
29b115 |
+ };
|
|
|
29b115 |
+ break;
|
|
|
29b115 |
+ default:
|
|
|
29b115 |
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
|
|
|
29b115 |
+ __func__, ctrl.class);
|
|
|
29b115 |
+ };
|
|
|
29b115 |
+
|
|
|
29b115 |
+ return false;
|
|
|
29b115 |
+}
|
|
|
29b115 |
+
|
|
|
29b115 |
+/**
|
|
|
29b115 |
+ * Validate and copy control virtqueue commands.
|
|
|
29b115 |
+ *
|
|
|
29b115 |
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
|
|
|
29b115 |
+ * prevent TOCTOU bugs.
|
|
|
29b115 |
*/
|
|
|
29b115 |
static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
|
|
|
29b115 |
VirtQueueElement *elem,
|
|
|
29b115 |
void *opaque)
|
|
|
29b115 |
{
|
|
|
29b115 |
- unsigned int n = elem->out_num + elem->in_num;
|
|
|
29b115 |
- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
|
|
|
29b115 |
+ VhostVDPAState *s = opaque;
|
|
|
29b115 |
size_t in_len, dev_written;
|
|
|
29b115 |
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
|
|
29b115 |
- int r;
|
|
|
29b115 |
+ /* out and in buffers sent to the device */
|
|
|
29b115 |
+ struct iovec dev_buffers[2] = {
|
|
|
29b115 |
+ { .iov_base = s->cvq_cmd_out_buffer },
|
|
|
29b115 |
+ { .iov_base = s->cvq_cmd_in_buffer },
|
|
|
29b115 |
+ };
|
|
|
29b115 |
+ /* in buffer used for device model */
|
|
|
29b115 |
+ const struct iovec in = {
|
|
|
29b115 |
+ .iov_base = &status,
|
|
|
29b115 |
+ .iov_len = sizeof(status),
|
|
|
29b115 |
+ };
|
|
|
29b115 |
+ int r = -EINVAL;
|
|
|
29b115 |
+ bool ok;
|
|
|
29b115 |
+
|
|
|
29b115 |
+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
|
|
|
29b115 |
+ if (unlikely(!ok)) {
|
|
|
29b115 |
+ goto out;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
|
|
|
29b115 |
- memcpy(dev_buffers, elem->out_sg, elem->out_num);
|
|
|
29b115 |
- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
|
|
|
29b115 |
+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
|
|
|
29b115 |
+ if (unlikely(!ok)) {
|
|
|
29b115 |
+ goto out;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
|
|
|
29b115 |
- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
|
|
|
29b115 |
- elem->in_num, elem);
|
|
|
29b115 |
+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
|
|
|
29b115 |
if (unlikely(r != 0)) {
|
|
|
29b115 |
if (unlikely(r == -ENOSPC)) {
|
|
|
29b115 |
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
|
|
|
29b115 |
@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
|
|
|
29b115 |
dev_written = vhost_svq_poll(svq);
|
|
|
29b115 |
if (unlikely(dev_written < sizeof(status))) {
|
|
|
29b115 |
error_report("Insufficient written data (%zu)", dev_written);
|
|
|
29b115 |
+ goto out;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
|
|
|
29b115 |
+ if (status != VIRTIO_NET_OK) {
|
|
|
29b115 |
+ goto out;
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+
|
|
|
29b115 |
+ status = VIRTIO_NET_ERR;
|
|
|
29b115 |
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
|
|
|
29b115 |
+ if (status != VIRTIO_NET_OK) {
|
|
|
29b115 |
+ error_report("Bad CVQ processing in model");
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
out:
|
|
|
29b115 |
@@ -234,6 +418,12 @@ out:
|
|
|
29b115 |
}
|
|
|
29b115 |
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
|
|
|
29b115 |
g_free(elem);
|
|
|
29b115 |
+ if (dev_buffers[0].iov_base) {
|
|
|
29b115 |
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
|
|
|
29b115 |
+ }
|
|
|
29b115 |
+ if (dev_buffers[1].iov_base) {
|
|
|
29b115 |
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
|
|
|
29b115 |
+ }
|
|
|
29b115 |
return r;
|
|
|
29b115 |
}
|
|
|
29b115 |
|
|
|
29b115 |
@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
|
|
|
29b115 |
s->vhost_vdpa.device_fd = vdpa_device_fd;
|
|
|
29b115 |
s->vhost_vdpa.index = queue_pair_index;
|
|
|
29b115 |
if (!is_datapath) {
|
|
|
29b115 |
+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size,
|
|
|
29b115 |
+ vhost_vdpa_net_cvq_cmd_page_len());
|
|
|
29b115 |
+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
|
|
|
29b115 |
+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size,
|
|
|
29b115 |
+ vhost_vdpa_net_cvq_cmd_page_len());
|
|
|
29b115 |
+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
|
|
|
29b115 |
+
|
|
|
29b115 |
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
|
|
|
29b115 |
s->vhost_vdpa.shadow_vq_ops_opaque = s;
|
|
|
29b115 |
}
|
|
|
29b115 |
--
|
|
|
29b115 |
2.31.1
|
|
|
29b115 |
|