diff --git a/SOURCES/0001-mem-add-function-for-checking-memsegs-IOVAs-addresse.patch b/SOURCES/0001-mem-add-function-for-checking-memsegs-IOVAs-addresse.patch new file mode 100644 index 0000000..0c0581d --- /dev/null +++ b/SOURCES/0001-mem-add-function-for-checking-memsegs-IOVAs-addresse.patch @@ -0,0 +1,114 @@ +From 3a80bc50c949760f7159b59ba30a70f95c223448 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Tue, 10 Jul 2018 18:25:48 +0100 +Subject: [1/5] mem: add function for checking memsegs IOVAs addresses + +A device can suffer addressing limitations. This functions checks +memsegs have iovas within the supported range based on dma mask. + +PMD should use this during initialization if supported devices +suffer addressing limitations, returning an error if this function +returns memsegs out of range. + +Another potential usage is for emulated IOMMU hardware with addressing +limitations. + +Applicable to v17.11.3 only. + +Signed-off-by: Alejandro Lucero +Acked-by: Anatoly Burakov +Acked-by: Eelco Chaudron +--- + lib/librte_eal/common/eal_common_memory.c | 48 ++++++++++++++++++++++++++++++ + lib/librte_eal/common/include/rte_memory.h | 3 ++ + lib/librte_eal/rte_eal_version.map | 1 + + 3 files changed, 52 insertions(+) + +diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c +index fc6c44d..00ab393 100644 +--- a/lib/librte_eal/common/eal_common_memory.c ++++ b/lib/librte_eal/common/eal_common_memory.c +@@ -109,6 +109,54 @@ rte_dump_physmem_layout(FILE *f) + } + } + ++#if defined(RTE_ARCH_X86) ++#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */ ++#define MAX_DMA_MASK_BITS X86_VA_WIDTH ++#else ++/* 63 bits is good enough for a sanity check */ ++#define MAX_DMA_MASK_BITS 63 ++#endif ++ ++/* check memseg iovas are within the required range based on dma mask */ ++int ++rte_eal_check_dma_mask(uint8_t maskbits) ++{ ++ ++ const struct rte_mem_config *mcfg; ++ uint64_t mask; ++ int i; ++ ++ /* sanity check */ ++ if (maskbits > MAX_DMA_MASK_BITS) { ++ RTE_LOG(INFO, EAL, "wrong dma mask size %u (Max: %u)\n", ++ maskbits, MAX_DMA_MASK_BITS); ++ return -1; ++ } ++ ++ /* create dma mask */ ++ mask = ~((1ULL << maskbits) - 1); ++ ++ /* get pointer to global configuration */ ++ mcfg = rte_eal_get_configuration()->mem_config; ++ ++ for (i = 0; i < RTE_MAX_MEMSEG; i++) { ++ if (mcfg->memseg[i].addr == NULL) ++ break; ++ ++ if (mcfg->memseg[i].iova & mask) { ++ RTE_LOG(INFO, EAL, ++ "memseg[%d] iova %"PRIx64" out of range:\n", ++ i, mcfg->memseg[i].iova); ++ ++ RTE_LOG(INFO, EAL, "\tusing dma mask %"PRIx64"\n", ++ mask); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + /* return the number of memory channels */ + unsigned rte_memory_get_nchannel(void) + { +diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h +index 80a8fc0..b2a0168 100644 +--- a/lib/librte_eal/common/include/rte_memory.h ++++ b/lib/librte_eal/common/include/rte_memory.h +@@ -209,6 +209,9 @@ unsigned rte_memory_get_nchannel(void); + */ + unsigned rte_memory_get_nrank(void); + ++/* check memsegs iovas are within a range based on dma mask */ ++int rte_eal_check_dma_mask(uint8_t maskbits); ++ + /** + * Drivers based on uio will not load unless physical + * addresses are obtainable. It is only possible to get +diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map +index f4f46c1..aa6cf87 100644 +--- a/lib/librte_eal/rte_eal_version.map ++++ b/lib/librte_eal/rte_eal_version.map +@@ -184,6 +184,7 @@ DPDK_17.11 { + + rte_eal_create_uio_dev; + rte_bus_get_iommu_class; ++ rte_eal_check_dma_mask; + rte_eal_has_pci; + rte_eal_iova_mode; + rte_eal_mbuf_default_mempool_ops; +-- +1.8.3.1 + diff --git a/SOURCES/0001-mem-fix-max-DMA-maskbit-size.patch b/SOURCES/0001-mem-fix-max-DMA-maskbit-size.patch new file mode 100644 index 0000000..39ffd7b --- /dev/null +++ b/SOURCES/0001-mem-fix-max-DMA-maskbit-size.patch @@ -0,0 +1,54 @@ +From 6f43d909b39607af7200a735cb0f959853d5fef6 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Fri, 31 Aug 2018 15:53:52 +0100 +Subject: [PATCH] mem: fix max DMA maskbit size + +The sanity check inside rte_eal_check_dma_mask is using 47 bits as +the maximum size. It turns out there are some IOMMU hardware reporting +48 bits precluding the IOVA VA mode to be enabled. + +It is harmless to raise the maximum mask size to 63 bits. + +This patch also removes any reference to unused X86_VA_WIDTH. + +Fixes: 3a80bc50c949 ("mem: add function for checking memsegs IOVAs addresses") + +Signed-off-by: Alejandro Lucero +--- + drivers/bus/pci/linux/pci.c | 1 - + lib/librte_eal/common/eal_common_memory.c | 5 ----- + 2 files changed, 6 deletions(-) + +diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c +index c81ed5025..44440f223 100644 +--- a/drivers/bus/pci/linux/pci.c ++++ b/drivers/bus/pci/linux/pci.c +@@ -583,7 +583,6 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) + { + #define VTD_CAP_MGAW_SHIFT 16 + #define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT) +-#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */ + struct rte_pci_addr *addr = &dev->addr; + char filename[PATH_MAX]; + FILE *fp; +diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c +index 00ab3935c..a0922f18b 100644 +--- a/lib/librte_eal/common/eal_common_memory.c ++++ b/lib/librte_eal/common/eal_common_memory.c +@@ -109,13 +109,8 @@ rte_dump_physmem_layout(FILE *f) + } + } + +-#if defined(RTE_ARCH_X86) +-#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt */ +-#define MAX_DMA_MASK_BITS X86_VA_WIDTH +-#else + /* 63 bits is good enough for a sanity check */ + #define MAX_DMA_MASK_BITS 63 +-#endif + + /* check memseg iovas are within the required range based on dma mask */ + int +-- +2.17.1 + diff --git a/SOURCES/0001-net-mlx4-avoid-stripping-the-glue-library.patch b/SOURCES/0001-net-mlx4-avoid-stripping-the-glue-library.patch new file mode 100644 index 0000000..68230c9 --- /dev/null +++ b/SOURCES/0001-net-mlx4-avoid-stripping-the-glue-library.patch @@ -0,0 +1,37 @@ +From b60115a1c00e30e15395ca0c4bcaf22c3ac431cb Mon Sep 17 00:00:00 2001 +Message-Id: +From: Timothy Redaelli +Date: Tue, 31 Jul 2018 15:15:27 +0200 +Subject: [PATCH 1/2] net/mlx4: avoid stripping the glue library + +Stripping binaries at build time is usually a bad thing since it makes +impossible to generate (split) debug symbols and this can lead to a more +difficult debugging. + +Fixes: 27cea11686ff ("net/mlx4: spawn rdma-core dependency plug-in") +Cc: stable@dpdk.org + +Signed-off-by: Timothy Redaelli +Acked-by: Luca Boccassi +Acked-by: Christian Ehrhardt +(cherry picked from commit d7a4e99d84f961c72eca7541e090cc3f43b60fb8) +--- + drivers/net/mlx4/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile +index d24565cd1..6228520c2 100644 +--- a/drivers/net/mlx4/Makefile ++++ b/drivers/net/mlx4/Makefile +@@ -139,7 +139,7 @@ $(LIB): $(LIB_GLUE) + $(LIB_GLUE): mlx4_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ + -Wl,-h,$(LIB_GLUE) \ +- -s -shared -o $@ $< -libverbs -lmlx4 ++ -shared -o $@ $< -libverbs -lmlx4 + + mlx4_glue.o: mlx4_autoconf.h + +-- +2.17.1 + diff --git a/SOURCES/0001-vhost-flush-IOTLB-cache-on-new-mem-table-handling.patch b/SOURCES/0001-vhost-flush-IOTLB-cache-on-new-mem-table-handling.patch new file mode 100644 index 0000000..dd97f28 --- /dev/null +++ b/SOURCES/0001-vhost-flush-IOTLB-cache-on-new-mem-table-handling.patch @@ -0,0 +1,87 @@ +From af53db486792f3d864c9a30dc13ee12402994640 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Thu, 2 Aug 2018 19:21:22 +0200 +Subject: [PATCH] vhost: flush IOTLB cache on new mem table handling + +IOTLB entries contain the host virtual address of the guest +pages. When receiving a new VHOST_USER_SET_MEM_TABLE request, +the previous regions get unmapped, so the IOTLB entries, if any, +will be invalid. It does cause the vhost-user process to +segfault. + +This patch introduces a new function to flush the IOTLB cache, +and call it as soon as the backend handles a VHOST_USER_SET_MEM +request. + +Fixes: 69c90e98f483 ("vhost: enable IOMMU support") +Cc: stable@dpdk.org + +Signed-off-by: Maxime Coquelin +Reviewed-by: Tiwei Bie +Reviewed-by: Jens Freimann +--- + lib/librte_vhost/iotlb.c | 10 ++++++++-- + lib/librte_vhost/iotlb.h | 2 +- + lib/librte_vhost/vhost_user.c | 5 +++++ + 3 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c +index c11ebcaac..c6354fef7 100644 +--- a/lib/librte_vhost/iotlb.c ++++ b/lib/librte_vhost/iotlb.c +@@ -303,6 +303,13 @@ vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, + return vva; + } + ++void ++vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) ++{ ++ vhost_user_iotlb_cache_remove_all(vq); ++ vhost_user_iotlb_pending_remove_all(vq); ++} ++ + int + vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) + { +@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) + * The cache has already been initialized, + * just drop all cached and pending entries. + */ +- vhost_user_iotlb_cache_remove_all(vq); +- vhost_user_iotlb_pending_remove_all(vq); ++ vhost_user_iotlb_flush_all(vq); + } + + #ifdef RTE_LIBRTE_VHOST_NUMA +diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h +index e7083e37b..60b9e4c57 100644 +--- a/lib/librte_vhost/iotlb.h ++++ b/lib/librte_vhost/iotlb.h +@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova, + uint8_t perm); + void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, + uint64_t size, uint8_t perm); +- ++void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); + int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); + + #endif /* _VHOST_IOTLB_H_ */ +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index dc53ff712..a2d4c9ffc 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -813,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg) + dev->mem = NULL; + } + ++ /* Flush IOTLB cache as previous HVAs are now invalid */ ++ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) ++ for (i = 0; i < dev->nr_vring; i++) ++ vhost_user_iotlb_flush_all(dev->virtqueue[i]); ++ + dev->nr_guest_pages = 0; + if (!dev->guest_pages) { + dev->max_guest_pages = 8; +-- +2.17.1 + diff --git a/SOURCES/0001-vhost-retranslate-vring-addr-when-memory-table-chang.patch b/SOURCES/0001-vhost-retranslate-vring-addr-when-memory-table-chang.patch new file mode 100644 index 0000000..707a16b --- /dev/null +++ b/SOURCES/0001-vhost-retranslate-vring-addr-when-memory-table-chang.patch @@ -0,0 +1,72 @@ +From 96935c61631fe2095246b5dce5c6fea960e34c87 Mon Sep 17 00:00:00 2001 +From: Maxime Coquelin +Date: Thu, 16 Aug 2018 19:29:22 +0200 +Subject: [PATCH] vhost: retranslate vring addr when memory table changes + +[ backported from upstream commit d5022533c20aed365d513663806a999459037015 ] + +When the vhost-user master sends memory updates using +VHOST_USER_SET_MEM request, the user backends unmap and then +mmap again the memory regions in its address space. + +If the ring addresses have already been translated, it needs to +be translated again as they point to unmapped memory. + +Signed-off-by: Maxime Coquelin +--- + lib/librte_vhost/vhost_user.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c +index 07c848c7c..0eb5e0d65 100644 +--- a/lib/librte_vhost/vhost_user.c ++++ b/lib/librte_vhost/vhost_user.c +@@ -622,8 +622,9 @@ dump_guest_pages(struct virtio_net *dev) + #endif + + static int +-vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) ++vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg) + { ++ struct virtio_net *dev = *pdev; + struct VhostUserMemory memory = pmsg->payload.memory; + struct rte_vhost_mem_region *reg; + void *mmap_addr; +@@ -732,6 +733,25 @@ vhost_user_set_mem_table(struct virtio_n + mmap_offset); + } + ++ for (i = 0; i < dev->nr_vring; i++) { ++ struct vhost_virtqueue *vq = dev->virtqueue[i]; ++ ++ if (vq->desc || vq->avail || vq->used) { ++ /* ++ * If the memory table got updated, the ring addresses ++ * need to be translated again as virtual addresses have ++ * changed. ++ */ ++ vring_invalidate(dev, vq); ++ ++ dev = translate_ring_addresses(dev, i); ++ if (!dev) ++ return -1; ++ ++ *pdev = dev; ++ } ++ } ++ + dump_guest_pages(dev); + + return 0; +@@ -1390,7 +1410,7 @@ vhost_user_msg_handler(int vid, int fd) + break; + + case VHOST_USER_SET_MEM_TABLE: +- ret = vhost_user_set_mem_table(dev, &msg); ++ ret = vhost_user_set_mem_table(&dev, &msg); + break; + + case VHOST_USER_SET_LOG_BASE: +-- +2.17.1 + diff --git a/SOURCES/0002-bus-pci-use-IOVAs-check-when-setting-IOVA-mode.patch b/SOURCES/0002-bus-pci-use-IOVAs-check-when-setting-IOVA-mode.patch new file mode 100644 index 0000000..9756259 --- /dev/null +++ b/SOURCES/0002-bus-pci-use-IOVAs-check-when-setting-IOVA-mode.patch @@ -0,0 +1,71 @@ +From 9372cd814782d3ffdf2464b64fc6aa67a0bf117c Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Tue, 10 Jul 2018 18:25:49 +0100 +Subject: [2/5] bus/pci: use IOVAs check when setting IOVA mode + +Although VT-d emulation currently only supports 39 bits, it could +be iovas being within that supported range. This patch allows +IOVA mode in such a case. + +Indeed, memory initialization code can be modified for using lower +virtual addresses than those used by the kernel for 64 bits processes +by default, and therefore memsegs iovas can use 39 bits or less for +most system. And this is likely 100% true for VMs. + +Applicable to v17.11.3 only. + +Signed-off-by: Alejandro Lucero +--- + drivers/bus/pci/linux/pci.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c +index 74deef3..c81ed50 100644 +--- a/drivers/bus/pci/linux/pci.c ++++ b/drivers/bus/pci/linux/pci.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + + #include "eal_private.h" + #include "eal_filesystem.h" +@@ -613,10 +614,12 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) + fclose(fp); + + mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; +- if (mgaw < X86_VA_WIDTH) ++ ++ if (!rte_eal_check_dma_mask(mgaw)) ++ return true; ++ else + return false; + +- return true; + } + #elif defined(RTE_ARCH_PPC_64) + static bool +@@ -640,13 +643,17 @@ pci_devices_iommu_support_va(void) + { + struct rte_pci_device *dev = NULL; + struct rte_pci_driver *drv = NULL; ++ int iommu_dma_mask_check_done = 0; + + FOREACH_DRIVER_ON_PCIBUS(drv) { + FOREACH_DEVICE_ON_PCIBUS(dev) { + if (!rte_pci_match(drv, dev)) + continue; +- if (!pci_one_device_iommu_support_va(dev)) +- return false; ++ if (!iommu_dma_mask_check_done) { ++ if (!pci_one_device_iommu_support_va(dev)) ++ return false; ++ iommu_dma_mask_check_done = 1; ++ } + } + } + return true; +-- +1.8.3.1 + diff --git a/SOURCES/0002-net-mlx5-avoid-stripping-the-glue-library.patch b/SOURCES/0002-net-mlx5-avoid-stripping-the-glue-library.patch new file mode 100644 index 0000000..165c10e --- /dev/null +++ b/SOURCES/0002-net-mlx5-avoid-stripping-the-glue-library.patch @@ -0,0 +1,39 @@ +From a22c6d90c0d7cecbde923014b398787fe56978c1 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Timothy Redaelli +Date: Tue, 31 Jul 2018 15:15:28 +0200 +Subject: [PATCH 2/2] net/mlx5: avoid stripping the glue library + +Stripping binaries at build time is usually a bad thing since it makes +impossible to generate (split) debug symbols and this can lead to a more +difficult debugging. + +Fixes: 59b91bec12c6 ("net/mlx5: spawn rdma-core dependency plug-in") +Cc: stable@dpdk.org + +Signed-off-by: Timothy Redaelli +Acked-by: Luca Boccassi +Acked-by: Christian Ehrhardt +(cherry picked from commit c7684b6be4977e7d343b17f798192062b312461d) +--- + drivers/net/mlx5/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile +index 589a06798..d4cba7329 100644 +--- a/drivers/net/mlx5/Makefile ++++ b/drivers/net/mlx5/Makefile +@@ -184,7 +184,7 @@ $(LIB): $(LIB_GLUE) + $(LIB_GLUE): mlx5_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ + -Wl,-h,$(LIB_GLUE) \ +- -s -shared -o $@ $< -libverbs -lmlx5 ++ -shared -o $@ $< -libverbs -lmlx5 + + mlx5_glue.o: mlx5_autoconf.h + +-- +2.17.1 + diff --git a/SOURCES/0003-mem-use-address-hint-for-mapping-hugepages.patch b/SOURCES/0003-mem-use-address-hint-for-mapping-hugepages.patch new file mode 100644 index 0000000..c9b2051 --- /dev/null +++ b/SOURCES/0003-mem-use-address-hint-for-mapping-hugepages.patch @@ -0,0 +1,125 @@ +From 293c0c4b957f811dc7a099d4bdf8f8acf36f0174 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Tue, 10 Jul 2018 18:25:50 +0100 +Subject: [3/5] mem: use address hint for mapping hugepages + +Linux kernel uses a really high address as starting address for +serving mmaps calls. If there exists addressing limitations and +IOVA mode is VA, this starting address is likely too high for +those devices. However, it is possible to use a lower address in +the process virtual address space as with 64 bits there is a lot +of available space. + +This patch adds an address hint as starting address for 64 bits +systems. + +Applicable to v17.11.3 only. + +Signed-off-by: Alejandro Lucero +Acked-by: Anatoly Burakov +Acked-by: Eelco Chaudron +--- + lib/librte_eal/linuxapp/eal/eal_memory.c | 55 ++++++++++++++++++++++++++------ + 1 file changed, 46 insertions(+), 9 deletions(-) + +diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c +index 0913895..bac969a 100644 +--- a/lib/librte_eal/linuxapp/eal/eal_memory.c ++++ b/lib/librte_eal/linuxapp/eal/eal_memory.c +@@ -88,6 +88,23 @@ + + static uint64_t baseaddr_offset; + ++#ifdef RTE_ARCH_64 ++/* ++ * Linux kernel uses a really high address as starting address for serving ++ * mmaps calls. If there exists addressing limitations and IOVA mode is VA, ++ * this starting address is likely too high for those devices. However, it ++ * is possible to use a lower address in the process virtual address space ++ * as with 64 bits there is a lot of available space. ++ * ++ * Current known limitations are 39 or 40 bits. Setting the starting address ++ * at 4GB implies there are 508GB or 1020GB for mapping the available ++ * hugepages. This is likely enough for most systems, although a device with ++ * addressing limitations should call rte_dev_check_dma_mask for ensuring all ++ * memory is within supported range. ++ */ ++static uint64_t baseaddr = 0x100000000; ++#endif ++ + static bool phys_addrs_available = true; + + #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +@@ -250,6 +267,23 @@ aslr_enabled(void) + } + } + ++static void * ++get_addr_hint(void) ++{ ++ if (internal_config.base_virtaddr != 0) { ++ return (void *) (uintptr_t) ++ (internal_config.base_virtaddr + ++ baseaddr_offset); ++ } else { ++#ifdef RTE_ARCH_64 ++ return (void *) (uintptr_t) (baseaddr + ++ baseaddr_offset); ++#else ++ return NULL; ++#endif ++ } ++} ++ + /* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry +@@ -260,16 +294,10 @@ aslr_enabled(void) + static void * + get_virtual_area(size_t *size, size_t hugepage_sz) + { +- void *addr; ++ void *addr, *addr_hint; + int fd; + long aligned_addr; + +- if (internal_config.base_virtaddr != 0) { +- addr = (void*) (uintptr_t) (internal_config.base_virtaddr + +- baseaddr_offset); +- } +- else addr = NULL; +- + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); +@@ -278,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz) + return NULL; + } + do { +- addr = mmap(addr, ++ addr_hint = get_addr_hint(); ++ ++ addr = mmap(addr_hint, + (*size) + hugepage_sz, PROT_READ, + #ifdef RTE_ARCH_PPC_64 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, +@@ -286,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz) + MAP_PRIVATE, + #endif + fd, 0); +- if (addr == MAP_FAILED) ++ if (addr == MAP_FAILED) { ++ /* map failed. Let's try with less memory */ + *size -= hugepage_sz; ++ } else if (addr_hint && addr != addr_hint) { ++ /* hint was not used. Try with another offset */ ++ munmap(addr, (*size) + hugepage_sz); ++ addr = MAP_FAILED; ++ baseaddr_offset += 0x100000000; ++ } + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { +-- +1.8.3.1 + diff --git a/SOURCES/0004-net-nfp-check-hugepages-IOVAs-based-on-DMA-mask.patch b/SOURCES/0004-net-nfp-check-hugepages-IOVAs-based-on-DMA-mask.patch new file mode 100644 index 0000000..5bd12d9 --- /dev/null +++ b/SOURCES/0004-net-nfp-check-hugepages-IOVAs-based-on-DMA-mask.patch @@ -0,0 +1,39 @@ +From 1b0deb27e144107939d3ac1119723b4a0e51a191 Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Tue, 10 Jul 2018 18:25:51 +0100 +Subject: [4/5] net/nfp: check hugepages IOVAs based on DMA mask + +NFP devices can not handle DMA addresses requiring more than +40 bits. This patch uses rte_dev_check_dma_mask with 40 bits +and avoids device initialization if memory out of NFP range. + +Applicable to v17.11.3 only. + +Signed-off-by: Alejandro Lucero +Acked-by: Eelco Chaudron +--- + drivers/net/nfp/nfp_net.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 96484d9..d2a240a 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -2653,6 +2653,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev) + + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + ++ /* NFP can not handle DMA addresses requiring more than 40 bits */ ++ if (rte_eal_check_dma_mask(40) < 0) { ++ RTE_LOG(INFO, PMD, "device %s can not be used:", ++ pci_dev->device.name); ++ RTE_LOG(INFO, PMD, "\trestricted dma mask to 40 bits!\n"); ++ return -ENODEV; ++ }; ++ + if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) || + (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) { + port = get_pf_port_number(eth_dev->data->name); +-- +1.8.3.1 + diff --git a/SOURCES/0005-net-nfp-support-IOVA-VA-mode.patch b/SOURCES/0005-net-nfp-support-IOVA-VA-mode.patch new file mode 100644 index 0000000..86b783f --- /dev/null +++ b/SOURCES/0005-net-nfp-support-IOVA-VA-mode.patch @@ -0,0 +1,42 @@ +From 054298d003584cd5709571b9df5e35f293c5354c Mon Sep 17 00:00:00 2001 +From: Alejandro Lucero +Date: Tue, 10 Jul 2018 18:25:52 +0100 +Subject: [5/5] net/nfp: support IOVA VA mode + +NFP can handle IOVA as VA. It requires to check those IOVAs +being in the supported range what is done during initialization. + +Applicable to v17.11.3 only. + +Signed-off-by: Alejandro Lucero +Acked-by: Eelco Chaudron +--- + drivers/net/nfp/nfp_net.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index d2a240a..8ab28dd 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -3057,14 +3057,16 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_nfp_net_pf_pmd = { + .id_table = pci_id_nfp_pf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | ++ RTE_PCI_DRV_IOVA_AS_VA, + .probe = nfp_pf_pci_probe, + .remove = eth_nfp_pci_remove, + }; + + static struct rte_pci_driver rte_nfp_net_vf_pmd = { + .id_table = pci_id_nfp_vf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | ++ RTE_PCI_DRV_IOVA_AS_VA, + .probe = eth_nfp_pci_probe, + .remove = eth_nfp_pci_remove, + }; +-- +1.8.3.1 + diff --git a/SPECS/dpdk.spec b/SPECS/dpdk.spec index f20d505..679c250 100644 --- a/SPECS/dpdk.spec +++ b/SPECS/dpdk.spec @@ -5,7 +5,7 @@ # Dont edit Version: and Release: directly, only these: %define ver 17.11 -%define rel 13 +%define rel 15 %define srcname dpdk # Define when building git snapshots @@ -34,7 +34,6 @@ Source506: x86_64-native-linuxapp-gcc-config # Patches only in dpdk package Patch0: dpdk-dev-v2-1-4-net-virtio-fix-vector-Rx-break-caused-by-rxq-flushing.patch -Patch2: 0001-bus-pci-forbid-IOVA-mode-if-IOMMU-address-width-too-.patch # Patches in common with the openvswitch package Patch400: 0001-vhost_user_protect_active_rings_from_async_ring_changes.patch @@ -149,6 +148,26 @@ Patch575: 0001-net-bnxt-fix-set-MTU.patch # Bug 1610481 Patch580: 0001-net-i40e-fix-port-segmentation-fault-when-restart.patch +# Bug 1609643 +Patch585: 0001-vhost-flush-IOTLB-cache-on-new-mem-table-handling.patch + +# Bug 1618488 +Patch590: 0001-vhost-retranslate-vring-addr-when-memory-table-chang.patch + +# Bug 1627285 +Patch600: 0001-net-mlx4-avoid-stripping-the-glue-library.patch +Patch601: 0002-net-mlx5-avoid-stripping-the-glue-library.patch + +# Bug 1634820 (part 2) +Patch610: 0001-mem-add-function-for-checking-memsegs-IOVAs-addresse.patch +# dependency +Patch611: 0001-bus-pci-forbid-IOVA-mode-if-IOMMU-address-width-too-.patch +Patch612: 0002-bus-pci-use-IOVAs-check-when-setting-IOVA-mode.patch +Patch613: 0003-mem-use-address-hint-for-mapping-hugepages.patch +Patch614: 0004-net-nfp-check-hugepages-IOVAs-based-on-DMA-mask.patch +Patch615: 0005-net-nfp-support-IOVA-VA-mode.patch +Patch616: 0001-mem-fix-max-DMA-maskbit-size.patch + # Patches only in dpdk package Patch700: 0001-net-mlx-fix-rdma-core-glue-path-with-EAL-plugins.patch @@ -378,6 +397,12 @@ sed -i -e 's:-%{machine_tmpl}-:-%{machine}-:g' %{buildroot}/%{_sysconfdir}/profi %endif %changelog +* Mon Nov 05 2018 Timothy Redaelli - 17.11-15 +- Re-align with DPDK patches inside OVS FDP 18.11 (#1646598) + +* Fri Sep 14 2018 Timothy Redaelli - 17.11-14 +- Backport "net/mlx{4,5}: avoid stripping the glue library" (#1627285) + * Tue Jul 31 2018 Timothy Redaelli - 17.11-13 - Re-align with DPDK patches inside OVS FDP 18.08 (#1610407) - Backport "net/i40e: fix port segmentation fault when restart" (#1610481)