Blame SOURCES/0001-vhost_user_protect_active_rings_from_async_ring_changes.patch

e49aad
From patchwork Wed Jan 17 13:49:25 2018
e49aad
Content-Type: text/plain; charset="utf-8"
e49aad
MIME-Version: 1.0
e49aad
Content-Transfer-Encoding: 7bit
e49aad
Subject: [dpdk-dev,
e49aad
 v5] vhost_user: protect active rings from async ring changes
e49aad
From: Victor Kaplansky <victork@redhat.com>
e49aad
X-Patchwork-Id: 33921
e49aad
X-Patchwork-Delegate: yuanhan.liu@linux.intel.com
e49aad
List-Id: dev.dpdk.org
e49aad
To: dev@dpdk.org
e49aad
Cc: stable@dpdk.org, Jens Freimann <jfreiman@redhat.com>,
e49aad
 Maxime Coquelin <maxime.coquelin@redhat.com>,
e49aad
 Yuanhan Liu <yliu@fridaylinux.org>, Tiwei Bie <tiwei.bie@intel.com>, 
e49aad
 "Tan, Jianfeng" <jianfeng.tan@intel.com>,
e49aad
 Stephen Hemminger <stephen@networkplumber.org>,
e49aad
 Victor Kaplansky <victork@redhat.com>
e49aad
Date: Wed, 17 Jan 2018 15:49:25 +0200
e49aad
e49aad
When performing live migration or memory hot-plugging,
e49aad
the changes to the device and vrings made by message handler
e49aad
done independently from vring usage by PMD threads.
e49aad
e49aad
This causes for example segfaults during live-migration
e49aad
with MQ enable, but in general virtually any request
e49aad
sent by qemu changing the state of device can cause
e49aad
problems.
e49aad
e49aad
These patches fixes all above issues by adding a spinlock
e49aad
to every vring and requiring message handler to start operation
e49aad
only after ensuring that all PMD threads related to the device
e49aad
are out of critical section accessing the vring data.
e49aad
e49aad
Each vring has its own lock in order to not create contention
e49aad
between PMD threads of different vrings and to prevent
e49aad
performance degradation by scaling queue pair number.
e49aad
e49aad
See https://bugzilla.redhat.com/show_bug.cgi?id=1450680
e49aad
e49aad
Signed-off-by: Victor Kaplansky <victork@redhat.com>
e49aad
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
e49aad
---
e49aad
v5:
e49aad
 o get rid of spinlock wrapping functions in vhost.h
e49aad
e49aad
v4:
e49aad
e49aad
 o moved access_unlock before accessing enable flag and
e49aad
   access_unlock after iommu_unlock consistently.
e49aad
 o cosmetics: removed blank line.
e49aad
 o the access_lock variable moved to be in the same
e49aad
   cache line with enable and access_ok flags.
e49aad
 o dequeue path is now guarded with trylock and returning
e49aad
   zero if unsuccessful.
e49aad
 o GET_VRING_BASE operation is not guarded by access lock
e49aad
   to avoid deadlock with device_destroy. See the comment
e49aad
   in the code.
e49aad
 o Fixed error path exit from enqueue and dequeue carefully
e49aad
   unlocking access and iommu locks as appropriate.
e49aad
e49aad
v3:
e49aad
   o Added locking to enqueue flow.
e49aad
   o Enqueue path guarded as well as dequeue path.
e49aad
   o Changed name of active_lock.
e49aad
   o Added initialization of guarding spinlock.
e49aad
   o Reworked functions skimming over all virt-queues.
e49aad
   o Performance measurements done by Maxime Coquelin shows
e49aad
     no degradation in bandwidth and throughput.
e49aad
   o Spelling.
e49aad
   o Taking lock only on set operations.
e49aad
   o IOMMU messages are not guarded by access lock.
e49aad
e49aad
v2:
e49aad
   o Fixed checkpatch complains.
e49aad
   o Added Signed-off-by.
e49aad
   o Refined placement of guard to exclude IOMMU messages.
e49aad
   o TODO: performance degradation measurement.
e49aad
e49aad
 dpdk-17.11/lib/librte_vhost/vhost.h      |  6 ++--
e49aad
 dpdk-17.11/lib/librte_vhost/vhost.c      |  1 +
e49aad
 dpdk-17.11/lib/librte_vhost/vhost_user.c | 70 ++++++++++++++++++++++++++++++++
e49aad
 dpdk-17.11/lib/librte_vhost/virtio_net.c | 28 ++++++++++++++---
e49aad
 4 files changed, 99 insertions(+), 6 deletions(-)
e49aad
e49aad
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
e49aad
index 1cc81c17..c8f2a817 100644
e49aad
--- a/lib/librte_vhost/vhost.h
e49aad
+++ b/lib/librte_vhost/vhost.h
e49aad
@@ -108,12 +108,14 @@ struct vhost_virtqueue {
e49aad
 
e49aad
 	/* Backend value to determine if device should started/stopped */
e49aad
 	int			backend;
e49aad
+	int			enabled;
e49aad
+	int			access_ok;
e49aad
+	rte_spinlock_t		access_lock;
e49aad
+
e49aad
 	/* Used to notify the guest (trigger interrupt) */
e49aad
 	int			callfd;
e49aad
 	/* Currently unused as polling mode is enabled */
e49aad
 	int			kickfd;
e49aad
-	int			enabled;
e49aad
-	int			access_ok;
e49aad
 
e49aad
 	/* Physical address of used ring, for logging */
e49aad
 	uint64_t		log_guest_addr;
e49aad
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
e49aad
index 4f8b73a0..dcc42fc7 100644
e49aad
--- a/lib/librte_vhost/vhost.c
e49aad
+++ b/lib/librte_vhost/vhost.c
e49aad
@@ -259,6 +259,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
e49aad
 
e49aad
 	dev->virtqueue[vring_idx] = vq;
e49aad
 	init_vring_queue(dev, vring_idx);
e49aad
+	rte_spinlock_init(&vq->access_lock);
e49aad
 
e49aad
 	dev->nr_vring += 1;
e49aad
 
e49aad
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
e49aad
index f4c7ce46..0685d4e7 100644
e49aad
--- a/lib/librte_vhost/vhost_user.c
e49aad
+++ b/lib/librte_vhost/vhost_user.c
e49aad
@@ -1190,12 +1190,47 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
e49aad
 	return alloc_vring_queue(dev, vring_idx);
e49aad
 }
e49aad
 
e49aad
+static void
e49aad
+vhost_user_lock_all_queue_pairs(struct virtio_net *dev)
e49aad
+{
e49aad
+	unsigned int i = 0;
e49aad
+	unsigned int vq_num = 0;
e49aad
+
e49aad
+	while (vq_num < dev->nr_vring) {
e49aad
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
e49aad
+
e49aad
+		if (vq) {
e49aad
+			rte_spinlock_lock(&vq->access_lock);
e49aad
+			vq_num++;
e49aad
+		}
e49aad
+		i++;
e49aad
+	}
e49aad
+}
e49aad
+
e49aad
+static void
e49aad
+vhost_user_unlock_all_queue_pairs(struct virtio_net *dev)
e49aad
+{
e49aad
+	unsigned int i = 0;
e49aad
+	unsigned int vq_num = 0;
e49aad
+
e49aad
+	while (vq_num < dev->nr_vring) {
e49aad
+		struct vhost_virtqueue *vq = dev->virtqueue[i];
e49aad
+
e49aad
+		if (vq) {
e49aad
+			rte_spinlock_unlock(&vq->access_lock);
e49aad
+			vq_num++;
e49aad
+		}
e49aad
+		i++;
e49aad
+	}
e49aad
+}
e49aad
+
e49aad
 int
e49aad
 vhost_user_msg_handler(int vid, int fd)
e49aad
 {
e49aad
 	struct virtio_net *dev;
e49aad
 	struct VhostUserMsg msg;
e49aad
 	int ret;
e49aad
+	int unlock_required = 0;
e49aad
 
e49aad
 	dev = get_device(vid);
e49aad
 	if (dev == NULL)
e49aad
@@ -1241,6 +1276,38 @@ vhost_user_msg_handler(int vid, int fd)
e49aad
 		return -1;
e49aad
 	}
e49aad
 
e49aad
+	/*
e49aad
+	 * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE,
e49aad
+	 * since it is sent when virtio stops and device is destroyed.
e49aad
+	 * destroy_device waits for queues to be inactive, so it is safe.
e49aad
+	 * Otherwise taking the access_lock would cause a dead lock.
e49aad
+	 */
e49aad
+	switch (msg.request.master) {
e49aad
+	case VHOST_USER_SET_FEATURES:
e49aad
+	case VHOST_USER_SET_PROTOCOL_FEATURES:
e49aad
+	case VHOST_USER_SET_OWNER:
e49aad
+	case VHOST_USER_RESET_OWNER:
e49aad
+	case VHOST_USER_SET_MEM_TABLE:
e49aad
+	case VHOST_USER_SET_LOG_BASE:
e49aad
+	case VHOST_USER_SET_LOG_FD:
e49aad
+	case VHOST_USER_SET_VRING_NUM:
e49aad
+	case VHOST_USER_SET_VRING_ADDR:
e49aad
+	case VHOST_USER_SET_VRING_BASE:
e49aad
+	case VHOST_USER_SET_VRING_KICK:
e49aad
+	case VHOST_USER_SET_VRING_CALL:
e49aad
+	case VHOST_USER_SET_VRING_ERR:
e49aad
+	case VHOST_USER_SET_VRING_ENABLE:
e49aad
+	case VHOST_USER_SEND_RARP:
e49aad
+	case VHOST_USER_NET_SET_MTU:
e49aad
+	case VHOST_USER_SET_SLAVE_REQ_FD:
e49aad
+		vhost_user_lock_all_queue_pairs(dev);
e49aad
+		unlock_required = 1;
e49aad
+		break;
e49aad
+	default:
e49aad
+		break;
e49aad
+
e49aad
+	}
e49aad
+
e49aad
 	switch (msg.request.master) {
e49aad
 	case VHOST_USER_GET_FEATURES:
e49aad
 		msg.payload.u64 = vhost_user_get_features(dev);
e49aad
@@ -1342,6 +1409,9 @@ vhost_user_msg_handler(int vid, int fd)
e49aad
 
e49aad
 	}
e49aad
 
e49aad
+	if (unlock_required)
e49aad
+		vhost_user_unlock_all_queue_pairs(dev);
e49aad
+
e49aad
 	if (msg.flags & VHOST_USER_NEED_REPLY) {
e49aad
 		msg.payload.u64 = !!ret;
e49aad
 		msg.size = sizeof(msg.payload.u64);
e49aad
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
e49aad
index 6fee16e5..e09a927d 100644
e49aad
--- a/lib/librte_vhost/virtio_net.c
e49aad
+++ b/lib/librte_vhost/virtio_net.c
e49aad
@@ -44,6 +44,7 @@
e49aad
 #include <rte_udp.h>
e49aad
 #include <rte_sctp.h>
e49aad
 #include <rte_arp.h>
e49aad
+#include <rte_spinlock.h>
e49aad
 
e49aad
 #include "iotlb.h"
e49aad
 #include "vhost.h"
e49aad
@@ -326,8 +327,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
e49aad
 	}
e49aad
 
e49aad
 	vq = dev->virtqueue[queue_id];
e49aad
+
e49aad
+	rte_spinlock_lock(&vq->access_lock);
e49aad
+
e49aad
 	if (unlikely(vq->enabled == 0))
e49aad
-		return 0;
e49aad
+		goto out_access_unlock;
e49aad
 
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
 		vhost_user_iotlb_rd_lock(vq);
e49aad
@@ -419,6 +423,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
 		vhost_user_iotlb_rd_unlock(vq);
e49aad
 
e49aad
+out_access_unlock:
e49aad
+	rte_spinlock_unlock(&vq->access_lock);
e49aad
+
e49aad
 	return count;
e49aad
 }
e49aad
 
e49aad
@@ -651,8 +658,11 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
e49aad
 	}
e49aad
 
e49aad
 	vq = dev->virtqueue[queue_id];
e49aad
+
e49aad
+	rte_spinlock_lock(&vq->access_lock);
e49aad
+
e49aad
 	if (unlikely(vq->enabled == 0))
e49aad
-		return 0;
e49aad
+		goto out_access_unlock;
e49aad
 
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
 		vhost_user_iotlb_rd_lock(vq);
e49aad
@@ -715,6 +725,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
 		vhost_user_iotlb_rd_unlock(vq);
e49aad
 
e49aad
+out_access_unlock:
e49aad
+	rte_spinlock_unlock(&vq->access_lock);
e49aad
+
e49aad
 	return pkt_idx;
e49aad
 }
e49aad
 
e49aad
@@ -1180,9 +1193,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
e49aad
 	}
e49aad
 
e49aad
 	vq = dev->virtqueue[queue_id];
e49aad
-	if (unlikely(vq->enabled == 0))
e49aad
+
e49aad
+	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
e49aad
 		return 0;
e49aad
 
e49aad
+	if (unlikely(vq->enabled == 0))
e49aad
+		goto out_access_unlock;
e49aad
+
e49aad
 	vq->batch_copy_nb_elems = 0;
e49aad
 
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
@@ -1240,7 +1257,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
e49aad
 		if (rarp_mbuf == NULL) {
e49aad
 			RTE_LOG(ERR, VHOST_DATA,
e49aad
 				"Failed to allocate memory for mbuf.\n");
e49aad
-			return 0;
e49aad
+			goto out;
e49aad
 		}
e49aad
 
e49aad
 		if (make_rarp_packet(rarp_mbuf, &dev->mac)) {
e49aad
@@ -1356,6 +1373,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
e49aad
 	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
e49aad
 		vhost_user_iotlb_rd_unlock(vq);
e49aad
 
e49aad
+out_access_unlock:
e49aad
+	rte_spinlock_unlock(&vq->access_lock);
e49aad
+
e49aad
 	if (unlikely(rarp_mbuf != NULL)) {
e49aad
 		/*
e49aad
 		 * Inject it to the head of "pkts" array, so that switch's mac