|
|
a6040a |
From 4d8b1e6aa5d7ecfc1d2ee606b4bd838b4f1ac9d2 Mon Sep 17 00:00:00 2001
|
|
|
a6040a |
From: Maxime Coquelin <maxime.coquelin@redhat.com>
|
|
|
a6040a |
Date: Thu, 17 May 2018 13:44:47 +0200
|
|
|
a6040a |
Subject: [PATCH] vhost: improve dirty pages logging performance
|
|
|
a6040a |
|
|
|
a6040a |
[ upstream commit c16915b8710911a75f0fbdb1aa5243f4cdfaf26a ]
|
|
|
a6040a |
|
|
|
a6040a |
This patch caches all dirty pages logging until the used ring index
|
|
|
a6040a |
is updated.
|
|
|
a6040a |
|
|
|
a6040a |
The goal of this optimization is to fix a performance regression
|
|
|
a6040a |
introduced when the vhost library started to use atomic operations
|
|
|
a6040a |
to set bits in the shared dirty log map. While the fix was valid
|
|
|
a6040a |
as previous implementation wasn't safe against concurrent accesses,
|
|
|
a6040a |
contention was induced.
|
|
|
a6040a |
|
|
|
a6040a |
With this patch, during migration, we have:
|
|
|
a6040a |
1. Less atomic operations as only a single atomic OR operation
|
|
|
a6040a |
per 32 or 64 (depending on CPU) pages.
|
|
|
a6040a |
2. Less atomic operations as during a burst, the same page will
|
|
|
a6040a |
be marked dirty only once.
|
|
|
a6040a |
3. Less write memory barriers.
|
|
|
a6040a |
|
|
|
a6040a |
Fixes: 897f13a1f726 ("vhost: make page logging atomic")
|
|
|
a6040a |
Cc: stable@dpdk.org
|
|
|
a6040a |
|
|
|
a6040a |
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
|
|
|
a6040a |
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
|
|
|
a6040a |
Reviewed-by: Tiwei Bie <tiwei.bie@intel.com>
|
|
|
a6040a |
---
|
|
|
a6040a |
lib/librte_vhost/vhost.h | 119 +++++++++++++++++++++++++++++++++++++++++-
|
|
|
a6040a |
lib/librte_vhost/virtio_net.c | 29 ++++++----
|
|
|
a6040a |
2 files changed, 137 insertions(+), 11 deletions(-)
|
|
|
a6040a |
|
|
|
a6040a |
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
|
|
|
a6040a |
index 16d6b8913..42c6a3a75 100644
|
|
|
a6040a |
--- a/lib/librte_vhost/vhost.h
|
|
|
a6040a |
+++ b/lib/librte_vhost/vhost.h
|
|
|
a6040a |
@@ -59,6 +59,8 @@
|
|
|
a6040a |
|
|
|
a6040a |
#define BUF_VECTOR_MAX 256
|
|
|
a6040a |
|
|
|
a6040a |
+#define VHOST_LOG_CACHE_NR 32
|
|
|
a6040a |
+
|
|
|
a6040a |
/**
|
|
|
a6040a |
* Structure contains buffer address, length and descriptor index
|
|
|
a6040a |
* from vring to do scatter RX.
|
|
|
a6040a |
@@ -92,6 +94,14 @@ struct batch_copy_elem {
|
|
|
a6040a |
uint64_t log_addr;
|
|
|
a6040a |
};
|
|
|
a6040a |
|
|
|
a6040a |
+/*
|
|
|
a6040a |
+ * Structure that contains the info for batched dirty logging.
|
|
|
a6040a |
+ */
|
|
|
a6040a |
+struct log_cache_entry {
|
|
|
a6040a |
+ uint32_t offset;
|
|
|
a6040a |
+ unsigned long val;
|
|
|
a6040a |
+};
|
|
|
a6040a |
+
|
|
|
a6040a |
/**
|
|
|
a6040a |
* Structure contains variables relevant to RX/TX virtqueues.
|
|
|
a6040a |
*/
|
|
|
a6040a |
@@ -133,6 +143,9 @@ struct vhost_virtqueue {
|
|
|
a6040a |
struct batch_copy_elem *batch_copy_elems;
|
|
|
a6040a |
uint16_t batch_copy_nb_elems;
|
|
|
a6040a |
|
|
|
a6040a |
+ struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
|
|
|
a6040a |
+ uint16_t log_cache_nb_elem;
|
|
|
a6040a |
+
|
|
|
a6040a |
rte_rwlock_t iotlb_lock;
|
|
|
a6040a |
rte_rwlock_t iotlb_pending_lock;
|
|
|
a6040a |
struct rte_mempool *iotlb_pool;
|
|
|
a6040a |
@@ -266,7 +279,15 @@ struct virtio_net {
|
|
|
a6040a |
static __rte_always_inline void
|
|
|
a6040a |
vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
|
|
|
a6040a |
{
|
|
|
a6040a |
- __sync_fetch_and_or_8(addr, (1U << nr));
|
|
|
a6040a |
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
|
|
|
a6040a |
+ /*
|
|
|
a6040a |
+ * __sync_ built-ins are deprecated, but __atomic_ ones
|
|
|
a6040a |
+ * are sub-optimized in older GCC versions.
|
|
|
a6040a |
+ */
|
|
|
a6040a |
+ __sync_fetch_and_or_1(addr, (1U << nr));
|
|
|
a6040a |
+#else
|
|
|
a6040a |
+ __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
|
|
|
a6040a |
+#endif
|
|
|
a6040a |
}
|
|
|
a6040a |
|
|
|
a6040a |
static __rte_always_inline void
|
|
|
a6040a |
@@ -297,6 +318,102 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
|
|
|
a6040a |
}
|
|
|
a6040a |
}
|
|
|
a6040a |
|
|
|
a6040a |
+static __rte_always_inline void
|
|
|
a6040a |
+vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
|
|
a6040a |
+{
|
|
|
a6040a |
+ unsigned long *log_base;
|
|
|
a6040a |
+ int i;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
|
|
|
a6040a |
+ !dev->log_base))
|
|
|
a6040a |
+ return;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ log_base = (unsigned long *)(uintptr_t)dev->log_base;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ /*
|
|
|
a6040a |
+ * It is expected a write memory barrier has been issued
|
|
|
a6040a |
+ * before this function is called.
|
|
|
a6040a |
+ */
|
|
|
a6040a |
+
|
|
|
a6040a |
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
|
|
|
a6040a |
+ struct log_cache_entry *elem = vq->log_cache + i;
|
|
|
a6040a |
+
|
|
|
a6040a |
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
|
|
|
a6040a |
+ /*
|
|
|
a6040a |
+ * '__sync' builtins are deprecated, but '__atomic' ones
|
|
|
a6040a |
+ * are sub-optimized in older GCC versions.
|
|
|
a6040a |
+ */
|
|
|
a6040a |
+ __sync_fetch_and_or(log_base + elem->offset, elem->val);
|
|
|
a6040a |
+#else
|
|
|
a6040a |
+ __atomic_fetch_or(log_base + elem->offset, elem->val,
|
|
|
a6040a |
+ __ATOMIC_RELAXED);
|
|
|
a6040a |
+#endif
|
|
|
a6040a |
+ }
|
|
|
a6040a |
+
|
|
|
a6040a |
+ rte_smp_wmb();
|
|
|
a6040a |
+
|
|
|
a6040a |
+ vq->log_cache_nb_elem = 0;
|
|
|
a6040a |
+}
|
|
|
a6040a |
+
|
|
|
a6040a |
+static __rte_always_inline void
|
|
|
a6040a |
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
+ uint64_t page)
|
|
|
a6040a |
+{
|
|
|
a6040a |
+ uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
|
|
|
a6040a |
+ uint32_t offset = page / (sizeof(unsigned long) << 3);
|
|
|
a6040a |
+ int i;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
|
|
|
a6040a |
+ struct log_cache_entry *elem = vq->log_cache + i;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ if (elem->offset == offset) {
|
|
|
a6040a |
+ elem->val |= (1UL << bit_nr);
|
|
|
a6040a |
+ return;
|
|
|
a6040a |
+ }
|
|
|
a6040a |
+ }
|
|
|
a6040a |
+
|
|
|
a6040a |
+ if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
|
|
|
a6040a |
+ /*
|
|
|
a6040a |
+ * No more room for a new log cache entry,
|
|
|
a6040a |
+ * so write the dirty log map directly.
|
|
|
a6040a |
+ */
|
|
|
a6040a |
+ rte_smp_wmb();
|
|
|
a6040a |
+ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
|
|
|
a6040a |
+
|
|
|
a6040a |
+ return;
|
|
|
a6040a |
+ }
|
|
|
a6040a |
+
|
|
|
a6040a |
+ vq->log_cache[i].offset = offset;
|
|
|
a6040a |
+ vq->log_cache[i].val = (1UL << bit_nr);
|
|
|
a6040a |
+}
|
|
|
a6040a |
+
|
|
|
a6040a |
+static __rte_always_inline void
|
|
|
a6040a |
+vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
+ uint64_t addr, uint64_t len)
|
|
|
a6040a |
+{
|
|
|
a6040a |
+ uint64_t page;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
|
|
|
a6040a |
+ !dev->log_base || !len))
|
|
|
a6040a |
+ return;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
|
|
|
a6040a |
+ return;
|
|
|
a6040a |
+
|
|
|
a6040a |
+ page = addr / VHOST_LOG_PAGE;
|
|
|
a6040a |
+ while (page * VHOST_LOG_PAGE < addr + len) {
|
|
|
a6040a |
+ vhost_log_cache_page(dev, vq, page);
|
|
|
a6040a |
+ page += 1;
|
|
|
a6040a |
+ }
|
|
|
a6040a |
+}
|
|
|
a6040a |
+
|
|
|
a6040a |
+static __rte_always_inline void
|
|
|
a6040a |
+vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
+ uint64_t offset, uint64_t len)
|
|
|
a6040a |
+{
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len);
|
|
|
a6040a |
+}
|
|
|
a6040a |
+
|
|
|
a6040a |
static __rte_always_inline void
|
|
|
a6040a |
vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
uint64_t offset, uint64_t len)
|
|
|
a6040a |
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
|
|
|
a6040a |
index a013c07b0..5f8763d3a 100644
|
|
|
a6040a |
--- a/lib/librte_vhost/virtio_net.c
|
|
|
a6040a |
+++ b/lib/librte_vhost/virtio_net.c
|
|
|
a6040a |
@@ -107,7 +107,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
rte_memcpy(&vq->used->ring[to],
|
|
|
a6040a |
&vq->shadow_used_ring[from],
|
|
|
a6040a |
size * sizeof(struct vring_used_elem));
|
|
|
a6040a |
- vhost_log_used_vring(dev, vq,
|
|
|
a6040a |
+ vhost_log_cache_used_vring(dev, vq,
|
|
|
a6040a |
offsetof(struct vring_used, ring[to]),
|
|
|
a6040a |
size * sizeof(struct vring_used_elem));
|
|
|
a6040a |
}
|
|
|
a6040a |
@@ -135,6 +135,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
|
|
a6040a |
|
|
|
a6040a |
rte_smp_wmb();
|
|
|
a6040a |
|
|
|
a6040a |
+ vhost_log_cache_sync(dev, vq);
|
|
|
a6040a |
+
|
|
|
a6040a |
*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
|
|
|
a6040a |
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
|
|
|
a6040a |
sizeof(vq->used->idx));
|
|
|
a6040a |
@@ -159,7 +161,7 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
|
|
a6040a |
|
|
|
a6040a |
for (i = 0; i < count; i++) {
|
|
|
a6040a |
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
|
|
|
a6040a |
- vhost_log_write(dev, elem[i].log_addr, elem[i].len);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
|
|
|
a6040a |
}
|
|
|
a6040a |
}
|
|
|
a6040a |
@@ -275,7 +277,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
virtio_enqueue_offload(m,
|
|
|
a6040a |
(struct virtio_net_hdr *)(uintptr_t)desc_addr);
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
|
|
|
a6040a |
- vhost_log_write(dev, desc_gaddr, dev->vhost_hlen);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
|
|
|
a6040a |
} else {
|
|
|
a6040a |
struct virtio_net_hdr vnet_hdr;
|
|
|
a6040a |
uint64_t remain = dev->vhost_hlen;
|
|
|
a6040a |
@@ -298,7 +300,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
(void *)(uintptr_t)src, len);
|
|
|
a6040a |
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)dst, len, 0);
|
|
|
a6040a |
- vhost_log_write(dev, guest_addr, len);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, guest_addr, len);
|
|
|
a6040a |
remain -= len;
|
|
|
a6040a |
guest_addr += len;
|
|
|
a6040a |
dst += len;
|
|
|
a6040a |
@@ -379,7 +381,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
desc_offset)),
|
|
|
a6040a |
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
|
|
|
a6040a |
cpy_len);
|
|
|
a6040a |
- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
|
|
|
a6040a |
+ cpy_len);
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
|
|
|
a6040a |
cpy_len, 0);
|
|
|
a6040a |
} else {
|
|
|
a6040a |
@@ -468,7 +471,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
|
|
|
a6040a |
vq->used->ring[used_idx].id = desc_indexes[i];
|
|
|
a6040a |
vq->used->ring[used_idx].len = pkts[i]->pkt_len +
|
|
|
a6040a |
dev->vhost_hlen;
|
|
|
a6040a |
- vhost_log_used_vring(dev, vq,
|
|
|
a6040a |
+ vhost_log_cache_used_vring(dev, vq,
|
|
|
a6040a |
offsetof(struct vring_used, ring[used_idx]),
|
|
|
a6040a |
sizeof(vq->used->ring[used_idx]));
|
|
|
a6040a |
}
|
|
|
a6040a |
@@ -528,6 +531,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
|
|
|
a6040a |
|
|
|
a6040a |
rte_smp_wmb();
|
|
|
a6040a |
|
|
|
a6040a |
+ vhost_log_cache_sync(dev, vq);
|
|
|
a6040a |
+
|
|
|
a6040a |
*(volatile uint16_t *)&vq->used->idx += count;
|
|
|
a6040a |
vq->last_used_idx += count;
|
|
|
a6040a |
vhost_log_used_vring(dev, vq,
|
|
|
a6040a |
@@ -797,7 +802,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)dst,
|
|
|
a6040a |
len, 0);
|
|
|
a6040a |
- vhost_log_write(dev, guest_addr, len);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq,
|
|
|
a6040a |
+ guest_addr, len);
|
|
|
a6040a |
|
|
|
a6040a |
remain -= len;
|
|
|
a6040a |
guest_addr += len;
|
|
|
a6040a |
@@ -806,7 +812,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
} else {
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
|
|
|
a6040a |
dev->vhost_hlen, 0);
|
|
|
a6040a |
- vhost_log_write(dev, hdr_phys_addr,
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, hdr_phys_addr,
|
|
|
a6040a |
dev->vhost_hlen);
|
|
|
a6040a |
}
|
|
|
a6040a |
|
|
|
a6040a |
@@ -820,7 +826,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
desc_offset)),
|
|
|
a6040a |
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
|
|
|
a6040a |
cpy_len);
|
|
|
a6040a |
- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
|
|
|
a6040a |
+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
|
|
|
a6040a |
+ cpy_len);
|
|
|
a6040a |
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
|
|
|
a6040a |
cpy_len, 0);
|
|
|
a6040a |
} else {
|
|
|
a6040a |
@@ -1384,7 +1391,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
{
|
|
|
a6040a |
vq->used->ring[used_idx].id = desc_idx;
|
|
|
a6040a |
vq->used->ring[used_idx].len = 0;
|
|
|
a6040a |
- vhost_log_used_vring(dev, vq,
|
|
|
a6040a |
+ vhost_log_cache_used_vring(dev, vq,
|
|
|
a6040a |
offsetof(struct vring_used, ring[used_idx]),
|
|
|
a6040a |
sizeof(vq->used->ring[used_idx]));
|
|
|
a6040a |
}
|
|
|
a6040a |
@@ -1399,6 +1406,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
a6040a |
rte_smp_wmb();
|
|
|
a6040a |
rte_smp_rmb();
|
|
|
a6040a |
|
|
|
a6040a |
+ vhost_log_cache_sync(dev, vq);
|
|
|
a6040a |
+
|
|
|
a6040a |
vq->used->idx += count;
|
|
|
a6040a |
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
|
|
|
a6040a |
sizeof(vq->used->idx));
|
|
|
a6040a |
--
|
|
|
a6040a |
2.14.3
|
|
|
a6040a |
|