diff --git a/.dpdk.metadata b/.dpdk.metadata index 3127259..6c843d4 100644 --- a/.dpdk.metadata +++ b/.dpdk.metadata @@ -1 +1 @@ -6df453304d4864c376ee4d10e99c799bf177776a SOURCES/dpdk-18.11.5.tar.xz +11d7848e74b85589ee7c4d91afafbf6b0b395a0e SOURCES/dpdk-18.11.8.tar.xz diff --git a/.gitignore b/.gitignore index 5505a01..80e642f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/dpdk-18.11.5.tar.xz +SOURCES/dpdk-18.11.8.tar.xz diff --git a/SOURCES/0001-eal-compute-IOVA-mode-based-on-PA-availability.patch b/SOURCES/0001-eal-compute-IOVA-mode-based-on-PA-availability.patch new file mode 100644 index 0000000..bd79f4e --- /dev/null +++ b/SOURCES/0001-eal-compute-IOVA-mode-based-on-PA-availability.patch @@ -0,0 +1,267 @@ +From a6fc8e35d9e72b2acd605b6c6a8b08d2541c0609 Mon Sep 17 00:00:00 2001 +From: Ben Walker +Date: Fri, 14 Jun 2019 11:39:16 +0200 +Subject: [PATCH 1/4] eal: compute IOVA mode based on PA availability + +Currently, if the bus selects IOVA as PA, the memory init can fail when +lacking access to physical addresses. +This can be quite hard for normal users to understand what is wrong +since this is the default behavior. + +Catch this situation earlier in eal init by validating physical addresses +availability, or select IOVA when no clear preferrence had been expressed. + +The bus code is changed so that it reports when it does not care about +the IOVA mode and let the eal init decide. + +In Linux implementation, rework rte_eal_using_phys_addrs() so that it can +be called earlier but still avoid a circular dependency with +rte_mem_virt2phys(). +In FreeBSD implementation, rte_eal_using_phys_addrs() always returns +false, so the detection part is left as is. + +If librte_kni is compiled in and the KNI kmod is loaded, +- if the buses requested VA, force to PA if physical addresses are + available as it was done before, +- else, keep iova as VA, KNI init will fail later. + +Signed-off-by: Ben Walker +Signed-off-by: David Marchand +Acked-by: Anatoly Burakov + +(cherry picked from commit c2361bab70c56f64e50f07946b1b20bf688d782a) +Signed-off-by: David Marchand +--- + lib/librte_eal/bsdapp/eal/eal.c | 10 +++++-- + lib/librte_eal/common/eal_common_bus.c | 4 --- + lib/librte_eal/common/include/rte_bus.h | 2 +- + lib/librte_eal/linuxapp/eal/eal.c | 38 ++++++++++++++++++++------ + lib/librte_eal/linuxapp/eal/eal_memory.c | 46 +++++++++----------------------- + 5 files changed, 51 insertions(+), 49 deletions(-) + +diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c +index bfac7fd..14ae853 100644 +--- a/lib/librte_eal/bsdapp/eal/eal.c ++++ b/lib/librte_eal/bsdapp/eal/eal.c +@@ -689,13 +689,19 @@ rte_eal_init(int argc, char **argv) + /* if no EAL option "--iova-mode=", use bus IOVA scheme */ + if (internal_config.iova_mode == RTE_IOVA_DC) { + /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */ +- rte_eal_get_configuration()->iova_mode = +- rte_bus_get_iommu_class(); ++ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); ++ ++ if (iova_mode == RTE_IOVA_DC) ++ iova_mode = RTE_IOVA_PA; ++ rte_eal_get_configuration()->iova_mode = iova_mode; + } else { + rte_eal_get_configuration()->iova_mode = + internal_config.iova_mode; + } + ++ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", ++ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); ++ + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? +diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c +index c8f1901..77f1be1 100644 +--- a/lib/librte_eal/common/eal_common_bus.c ++++ b/lib/librte_eal/common/eal_common_bus.c +@@ -237,10 +237,6 @@ rte_bus_get_iommu_class(void) + mode |= bus->get_iommu_class(); + } + +- if (mode != RTE_IOVA_VA) { +- /* Use default IOVA mode */ +- mode = RTE_IOVA_PA; +- } + return mode; + } + +diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h +index 6be4b5c..b87e23b 100644 +--- a/lib/librte_eal/common/include/rte_bus.h ++++ b/lib/librte_eal/common/include/rte_bus.h +@@ -348,7 +348,7 @@ struct rte_bus *rte_bus_find_by_name(const char *busname); + + /** + * Get the common iommu class of devices bound on to buses available in the +- * system. The default mode is PA. ++ * system. RTE_IOVA_DC means that no preferrence has been expressed. + * + * @return + * enum rte_iova_mode value. +diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c +index 7a08cf1..6899307 100644 +--- a/lib/librte_eal/linuxapp/eal/eal.c ++++ b/lib/librte_eal/linuxapp/eal/eal.c +@@ -943,6 +943,7 @@ rte_eal_init(int argc, char **argv) + static char logid[PATH_MAX]; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; ++ bool phys_addrs; + + /* checks if the machine is adequate */ + if (!rte_cpu_is_supported()) { +@@ -1030,25 +1031,46 @@ rte_eal_init(int argc, char **argv) + return -1; + } + ++ phys_addrs = rte_eal_using_phys_addrs() != 0; ++ + /* if no EAL option "--iova-mode=", use bus IOVA scheme */ + if (internal_config.iova_mode == RTE_IOVA_DC) { +- /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */ +- rte_eal_get_configuration()->iova_mode = +- rte_bus_get_iommu_class(); ++ /* autodetect the IOVA mapping mode */ ++ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); + ++ if (iova_mode == RTE_IOVA_DC) { ++ iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA; ++ RTE_LOG(DEBUG, EAL, ++ "Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n", ++ phys_addrs ? "PA" : "VA"); ++ } ++#ifdef RTE_LIBRTE_KNI + /* Workaround for KNI which requires physical address to work */ +- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && ++ if (iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { +- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; +- RTE_LOG(WARNING, EAL, +- "Some devices want IOVA as VA but PA will be used because.. " +- "KNI module inserted\n"); ++ if (phys_addrs) { ++ iova_mode = RTE_IOVA_PA; ++ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); ++ } else { ++ RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); ++ } + } ++#endif ++ rte_eal_get_configuration()->iova_mode = iova_mode; + } else { + rte_eal_get_configuration()->iova_mode = + internal_config.iova_mode; + } + ++ if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { ++ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", ++ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); ++ + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? +diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c +index 898bdb7..24d99c0 100644 +--- a/lib/librte_eal/linuxapp/eal/eal_memory.c ++++ b/lib/librte_eal/linuxapp/eal/eal_memory.c +@@ -62,34 +62,10 @@ + * zone as well as a physical contiguous zone. + */ + +-static bool phys_addrs_available = true; ++static int phys_addrs_available = -1; + + #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" + +-static void +-test_phys_addrs_available(void) +-{ +- uint64_t tmp = 0; +- phys_addr_t physaddr; +- +- if (!rte_eal_has_hugepages()) { +- RTE_LOG(ERR, EAL, +- "Started without hugepages support, physical addresses not available\n"); +- phys_addrs_available = false; +- return; +- } +- +- physaddr = rte_mem_virt2phy(&tmp); +- if (physaddr == RTE_BAD_PHYS_ADDR) { +- if (rte_eal_iova_mode() == RTE_IOVA_PA) +- RTE_LOG(ERR, EAL, +- "Cannot obtain physical addresses: %s. " +- "Only vfio will function.\n", +- strerror(errno)); +- phys_addrs_available = false; +- } +-} +- + /* + * Get physical address of any mapped virtual address in the current process. + */ +@@ -102,8 +78,7 @@ rte_mem_virt2phy(const void *virtaddr) + int page_size; + off_t offset; + +- /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ +- if (!phys_addrs_available) ++ if (phys_addrs_available == 0) + return RTE_BAD_IOVA; + + /* standard page size */ +@@ -1332,8 +1307,6 @@ eal_legacy_hugepage_init(void) + int nr_hugefiles, nr_hugepages = 0; + void *addr; + +- test_phys_addrs_available(); +- + memset(used_hp, 0, sizeof(used_hp)); + + /* get pointer to global configuration */ +@@ -1466,7 +1439,7 @@ eal_legacy_hugepage_init(void) + continue; + } + +- if (phys_addrs_available && ++ if (rte_eal_using_phys_addrs() && + rte_eal_iova_mode() != RTE_IOVA_VA) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { +@@ -1685,8 +1658,6 @@ eal_hugepage_init(void) + uint64_t memory[RTE_MAX_NUMA_NODES]; + int hp_sz_idx, socket_id; + +- test_phys_addrs_available(); +- + memset(used_hp, 0, sizeof(used_hp)); + + for (hp_sz_idx = 0; +@@ -1812,8 +1783,6 @@ eal_legacy_hugepage_attach(void) + "into secondary processes\n"); + } + +- test_phys_addrs_available(); +- + fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", +@@ -1953,6 +1922,15 @@ rte_eal_hugepage_attach(void) + int + rte_eal_using_phys_addrs(void) + { ++ if (phys_addrs_available == -1) { ++ uint64_t tmp = 0; ++ ++ if (rte_eal_has_hugepages() != 0 && ++ rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR) ++ phys_addrs_available = 1; ++ else ++ phys_addrs_available = 0; ++ } + return phys_addrs_available; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0001-net-i40e-re-program-promiscuous-mode-on-VF-interface.patch b/SOURCES/0001-net-i40e-re-program-promiscuous-mode-on-VF-interface.patch new file mode 100644 index 0000000..49d38e2 --- /dev/null +++ b/SOURCES/0001-net-i40e-re-program-promiscuous-mode-on-VF-interface.patch @@ -0,0 +1,71 @@ +From b6cf6a0100ce9645a74a23bbeeff4083e80d8a39 Mon Sep 17 00:00:00 2001 +Message-Id: +From: Eelco Chaudron +Date: Tue, 19 Nov 2019 08:45:21 -0500 +Subject: [PATCH] net/i40e: re-program promiscuous mode on VF interface + +During a kernel PF reset, this event is propagated to the VF. +The DPDK VF PMD will execute the reset task before the PF is done +with his. This results in the admin queue message not being responded +to leaving the port in "promiscuous" mode. + +This patch makes sure the promiscuous mode is configured independently +of the current admin state. + +Signed-off-by: Eelco Chaudron +Reviewed-by: Xiao Zhang +--- + drivers/net/i40e/i40e_ethdev_vf.c | 16 ---------------- + 1 file changed, 16 deletions(-) + +diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c +index 5dba092..43f7ab5 100644 +--- a/drivers/net/i40e/i40e_ethdev_vf.c ++++ b/drivers/net/i40e/i40e_ethdev_vf.c +@@ -2092,10 +2092,6 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (vf->promisc_unicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, 1, vf->promisc_multicast_enabled); + if (ret == 0) + vf->promisc_unicast_enabled = TRUE; +@@ -2107,10 +2103,6 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If disabled, just return */ +- if (!vf->promisc_unicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, 0, vf->promisc_multicast_enabled); + if (ret == 0) + vf->promisc_unicast_enabled = FALSE; +@@ -2122,10 +2114,6 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (vf->promisc_multicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, vf->promisc_unicast_enabled, 1); + if (ret == 0) + vf->promisc_multicast_enabled = TRUE; +@@ -2137,10 +2125,6 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (!vf->promisc_multicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, vf->promisc_unicast_enabled, 0); + if (ret == 0) + vf->promisc_multicast_enabled = FALSE; +-- +1.8.3.1 + diff --git a/SOURCES/0001-vhost-add-device-op-when-notification-to-guest-is-se.patch b/SOURCES/0001-vhost-add-device-op-when-notification-to-guest-is-se.patch new file mode 100644 index 0000000..7a37aa3 --- /dev/null +++ b/SOURCES/0001-vhost-add-device-op-when-notification-to-guest-is-se.patch @@ -0,0 +1,83 @@ +From 039253166a57ee660dd2fbe92ca77fa65154751c Mon Sep 17 00:00:00 2001 +Message-Id: <039253166a57ee660dd2fbe92ca77fa65154751c.1577105865.git.echaudro@redhat.com> +From: Eelco Chaudron +Date: Wed, 28 Aug 2019 10:49:39 -0400 +Subject: [PATCH] vhost: add device op when notification to guest is sent + +This patch adds an operation callback which gets called every time +the library is waking up the guest trough an eventfd_write() call. + +This can be used by 3rd party application, like OVS, to track the +number of times interrupts where generated. This might be of +interest to find out system-call were called in the fast path. + +Signed-off-by: Eelco Chaudron +Reviewed-by: Maxime Coquelin +--- + lib/librte_vhost/rte_vhost.h | 10 +++++++++- + lib/librte_vhost/vhost.h | 15 ++++++++++++--- + 2 files changed, 21 insertions(+), 4 deletions(-) + +diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h +index 7fb1729..878e339 100644 +--- a/lib/librte_vhost/rte_vhost.h ++++ b/lib/librte_vhost/rte_vhost.h +@@ -172,7 +172,15 @@ struct vhost_device_ops { + int (*new_connection)(int vid); + void (*destroy_connection)(int vid); + +- void *reserved[2]; /**< Reserved for future extension */ ++ /** ++ * This callback gets called each time a guest gets notified ++ * about waiting packets. This is the interrupt handling trough ++ * the eventfd_write(callfd), which can be used for counting these ++ * "slow" syscalls. ++ */ ++ void (*guest_notified)(int vid); ++ ++ void *reserved[1]; /**< Reserved for future extension */ + }; + + /** +diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h +index 884befa..5131a97 100644 +--- a/lib/librte_vhost/vhost.h ++++ b/lib/librte_vhost/vhost.h +@@ -543,13 +543,19 @@ void *vhost_alloc_copy_ind_table(struct virtio_net *dev, + + if ((vhost_need_event(vhost_used_event(vq), new, old) && + (vq->callfd >= 0)) || +- unlikely(!signalled_used_valid)) ++ unlikely(!signalled_used_valid)) { + eventfd_write(vq->callfd, (eventfd_t) 1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } else { + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) +- && (vq->callfd >= 0)) ++ && (vq->callfd >= 0)) { + eventfd_write(vq->callfd, (eventfd_t)1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } + } + +@@ -600,8 +606,11 @@ void *vhost_alloc_copy_ind_table(struct virtio_net *dev, + if (vhost_need_event(off, new, old)) + kick = true; + kick: +- if (kick) ++ if (kick) { + eventfd_write(vq->callfd, (eventfd_t)1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } + + static __rte_always_inline void +-- +1.8.3.1 + diff --git a/SOURCES/0002-bus-pci-consider-only-usable-devices-for-IOVA-mode.patch b/SOURCES/0002-bus-pci-consider-only-usable-devices-for-IOVA-mode.patch new file mode 100644 index 0000000..63cc260 --- /dev/null +++ b/SOURCES/0002-bus-pci-consider-only-usable-devices-for-IOVA-mode.patch @@ -0,0 +1,376 @@ +From 25986da4cfa1b20ca6e9f4e39a34e12d72435963 Mon Sep 17 00:00:00 2001 +From: Ben Walker +Date: Mon, 18 Nov 2019 15:23:18 +0000 +Subject: [PATCH 2/4] bus/pci: consider only usable devices for IOVA mode + +When selecting the preferred IOVA mode of the pci bus, the current +heuristic ("are devices bound?", "are devices bound to UIO?", "are pmd +drivers supporting IOVA as VA?" etc..) should honor the device +white/blacklist so that an unwanted device does not impact the decision. + +There is no reason to consider a device which has no driver available. + +This applies to all OS, so implements this in common code then call a +OS specific callback. + +On Linux side: +- the VFIO special considerations should be evaluated only if VFIO + support is built, +- there is no strong requirement on using VA rather than PA if a driver + supports VA, so defaulting to DC in such a case. + +Signed-off-by: Ben Walker +Signed-off-by: David Marchand +Reviewed-by: Anatoly Burakov + +(cherry picked from commit 703458e19c16135143b3f30089e1af66100c82dc) +Signed-off-by: David Marchand + +Conflicts: + drivers/bus/pci/linux/pci.c + drivers/bus/pci/pci_common.c +--- + drivers/bus/pci/bsd/pci.c | 9 +- + drivers/bus/pci/linux/pci.c | 185 +++++++++-------------------------- + drivers/bus/pci/pci_common.c | 65 ++++++++++++ + drivers/bus/pci/private.h | 8 ++ + 4 files changed, 126 insertions(+), 141 deletions(-) + +diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c +index d09f8ee5a..0f23f12b9 100644 +--- a/drivers/bus/pci/bsd/pci.c ++++ b/drivers/bus/pci/bsd/pci.c +@@ -377,11 +377,12 @@ rte_pci_scan(void) + } + +-/* +- * Get iommu class of PCI devices on the bus. +- */ + enum rte_iova_mode +-rte_pci_get_iommu_class(void) ++pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused, ++ const struct rte_pci_device *pdev) + { + /* Supports only RTE_KDRV_NIC_UIO */ ++ if (pdev->kdrv != RTE_KDRV_NIC_UIO) ++ RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n"); ++ + return RTE_IOVA_PA; + } +diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c +index 74794a3ba..7d73d9de5 100644 +--- a/drivers/bus/pci/linux/pci.c ++++ b/drivers/bus/pci/linux/pci.c +@@ -498,91 +498,11 @@ rte_pci_scan(void) + } + +-/* +- * Is pci device bound to any kdrv +- */ +-static inline int +-pci_one_device_is_bound(void) +-{ +- struct rte_pci_device *dev = NULL; +- int ret = 0; +- +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (dev->kdrv == RTE_KDRV_UNKNOWN || +- dev->kdrv == RTE_KDRV_NONE) { +- continue; +- } else { +- ret = 1; +- break; +- } +- } +- return ret; +-} +- +-/* +- * Any one of the device bound to uio +- */ +-static inline int +-pci_one_device_bound_uio(void) +-{ +- struct rte_pci_device *dev = NULL; +- struct rte_devargs *devargs; +- int need_check; +- +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- devargs = dev->device.devargs; +- +- need_check = 0; +- switch (rte_pci_bus.bus.conf.scan_mode) { +- case RTE_BUS_SCAN_WHITELIST: +- if (devargs && devargs->policy == RTE_DEV_WHITELISTED) +- need_check = 1; +- break; +- case RTE_BUS_SCAN_UNDEFINED: +- case RTE_BUS_SCAN_BLACKLIST: +- if (devargs == NULL || +- devargs->policy != RTE_DEV_BLACKLISTED) +- need_check = 1; +- break; +- } +- +- if (!need_check) +- continue; +- +- if (dev->kdrv == RTE_KDRV_IGB_UIO || +- dev->kdrv == RTE_KDRV_UIO_GENERIC) { +- return 1; +- } +- } +- return 0; +-} +- +-/* +- * Any one of the device has iova as va +- */ +-static inline int +-pci_one_device_has_iova_va(void) +-{ +- struct rte_pci_device *dev = NULL; +- struct rte_pci_driver *drv = NULL; +- +- FOREACH_DRIVER_ON_PCIBUS(drv) { +- if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) { +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (dev->kdrv == RTE_KDRV_VFIO && +- rte_pci_match(drv, dev)) +- return 1; +- } +- } +- } +- return 0; +-} +- + #if defined(RTE_ARCH_X86) + static bool +-pci_one_device_iommu_support_va(struct rte_pci_device *dev) ++pci_one_device_iommu_support_va(const struct rte_pci_device *dev) + { + #define VTD_CAP_MGAW_SHIFT 16 + #define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT) +- struct rte_pci_addr *addr = &dev->addr; ++ const struct rte_pci_addr *addr = &dev->addr; + char filename[PATH_MAX]; + FILE *fp; +@@ -628,5 +548,5 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) + #elif defined(RTE_ARCH_PPC_64) + static bool +-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) ++pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return false; +@@ -634,5 +554,5 @@ pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) + #else + static bool +-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) ++pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return true; +@@ -640,66 +560,57 @@ pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) + #endif + +-/* +- * All devices IOMMUs support VA as IOVA +- */ +-static bool +-pci_devices_iommu_support_va(void) +-{ +- struct rte_pci_device *dev = NULL; +- struct rte_pci_driver *drv = NULL; +- +- FOREACH_DRIVER_ON_PCIBUS(drv) { +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (!rte_pci_match(drv, dev)) +- continue; +- /* +- * just one PCI device needs to be checked out because +- * the IOMMU hardware is the same for all of them. +- */ +- return pci_one_device_iommu_support_va(dev); +- } +- } +- return true; +-} +- +-/* +- * Get iommu class of PCI devices on the bus. +- */ + enum rte_iova_mode +-rte_pci_get_iommu_class(void) ++pci_device_iova_mode(const struct rte_pci_driver *pdrv, ++ const struct rte_pci_device *pdev) + { +- bool is_bound; +- bool is_vfio_noiommu_enabled = true; +- bool has_iova_va; +- bool is_bound_uio; +- bool iommu_no_va; ++ enum rte_iova_mode iova_mode = RTE_IOVA_DC; ++ static int iommu_no_va = -1; + +- is_bound = pci_one_device_is_bound(); +- if (!is_bound) +- return RTE_IOVA_DC; +- +- has_iova_va = pci_one_device_has_iova_va(); +- is_bound_uio = pci_one_device_bound_uio(); +- iommu_no_va = !pci_devices_iommu_support_va(); ++ switch (pdev->kdrv) { ++ case RTE_KDRV_VFIO: { + #ifdef VFIO_PRESENT +- is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? +- true : false; ++ static int is_vfio_noiommu_enabled = -1; ++ ++ if (is_vfio_noiommu_enabled == -1) { ++ if (rte_vfio_noiommu_is_enabled() == 1) ++ is_vfio_noiommu_enabled = 1; ++ else ++ is_vfio_noiommu_enabled = 0; ++ } ++ if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) { ++ iova_mode = RTE_IOVA_PA; ++ } else if (is_vfio_noiommu_enabled != 0) { ++ RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n"); ++ iova_mode = RTE_IOVA_PA; ++ } + #endif ++ break; ++ } + +- if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && +- !iommu_no_va) +- return RTE_IOVA_VA; ++ case RTE_KDRV_IGB_UIO: ++ case RTE_KDRV_UIO_GENERIC: ++ iova_mode = RTE_IOVA_PA; ++ break; + +- if (has_iova_va) { +- RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. "); +- if (is_vfio_noiommu_enabled) +- RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); +- if (is_bound_uio) +- RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); +- if (iommu_no_va) +- RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n"); ++ default: ++ RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n"); ++ iova_mode = RTE_IOVA_PA; ++ break; + } + +- return RTE_IOVA_PA; ++ if (iova_mode != RTE_IOVA_PA) { ++ /* ++ * We can check this only once, because the IOMMU hardware is ++ * the same for all of them. ++ */ ++ if (iommu_no_va == -1) ++ iommu_no_va = pci_one_device_iommu_support_va(pdev) ++ ? 0 : 1; ++ if (iommu_no_va != 0) { ++ RTE_LOG(DEBUG, EAL, "Forcing to 'PA', IOMMU does not support IOVA as 'VA'\n"); ++ iova_mode = RTE_IOVA_PA; ++ } ++ } ++ return iova_mode; + } + +diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c +index 6276e5d69..b8cc6d31a 100644 +--- a/drivers/bus/pci/pci_common.c ++++ b/drivers/bus/pci/pci_common.c +@@ -529,4 +529,69 @@ pci_unplug(struct rte_device *dev) + } + ++static bool ++pci_ignore_device(const struct rte_pci_device *dev) ++{ ++ struct rte_devargs *devargs = dev->device.devargs; ++ ++ switch (rte_pci_bus.bus.conf.scan_mode) { ++ case RTE_BUS_SCAN_WHITELIST: ++ if (devargs && devargs->policy == RTE_DEV_WHITELISTED) ++ return false; ++ break; ++ case RTE_BUS_SCAN_UNDEFINED: ++ case RTE_BUS_SCAN_BLACKLIST: ++ if (devargs == NULL || ++ devargs->policy != RTE_DEV_BLACKLISTED) ++ return false; ++ break; ++ } ++ return true; ++} ++ ++enum rte_iova_mode ++rte_pci_get_iommu_class(void) ++{ ++ enum rte_iova_mode iova_mode = RTE_IOVA_DC; ++ const struct rte_pci_device *dev; ++ const struct rte_pci_driver *drv; ++ bool devices_want_va = false; ++ bool devices_want_pa = false; ++ ++ FOREACH_DEVICE_ON_PCIBUS(dev) { ++ if (pci_ignore_device(dev)) ++ continue; ++ if (dev->kdrv == RTE_KDRV_UNKNOWN || ++ dev->kdrv == RTE_KDRV_NONE) ++ continue; ++ FOREACH_DRIVER_ON_PCIBUS(drv) { ++ enum rte_iova_mode dev_iova_mode; ++ ++ if (!rte_pci_match(drv, dev)) ++ continue; ++ ++ dev_iova_mode = pci_device_iova_mode(drv, dev); ++ RTE_LOG(DEBUG, EAL, "PCI driver %s for device " ++ PCI_PRI_FMT " wants IOVA as '%s'\n", ++ drv->driver.name, ++ dev->addr.domain, dev->addr.bus, ++ dev->addr.devid, dev->addr.function, ++ dev_iova_mode == RTE_IOVA_DC ? "DC" : ++ (dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA")); ++ if (dev_iova_mode == RTE_IOVA_PA) ++ devices_want_pa = true; ++ else if (dev_iova_mode == RTE_IOVA_VA) ++ devices_want_va = true; ++ } ++ } ++ if (devices_want_pa) { ++ iova_mode = RTE_IOVA_PA; ++ if (devices_want_va) ++ RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'PA' because other devices want it\n"); ++ } else if (devices_want_va) { ++ iova_mode = RTE_IOVA_VA; ++ } ++ return iova_mode; ++} ++ + struct rte_pci_bus rte_pci_bus = { + .bus = { +diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h +index 13c3324bb..8a5524052 100644 +--- a/drivers/bus/pci/private.h ++++ b/drivers/bus/pci/private.h +@@ -173,4 +173,12 @@ rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + ++/** ++ * OS specific callback for rte_pci_get_iommu_class ++ * ++ */ ++enum rte_iova_mode ++pci_device_iova_mode(const struct rte_pci_driver *pci_drv, ++ const struct rte_pci_device *pci_dev); ++ + /** + * Get iommu class of PCI devices on the bus. +-- +2.21.0 + diff --git a/SOURCES/0003-eal-fix-IOVA-mode-selection-as-VA-for-PCI-drivers.patch b/SOURCES/0003-eal-fix-IOVA-mode-selection-as-VA-for-PCI-drivers.patch new file mode 100644 index 0000000..478fd6c --- /dev/null +++ b/SOURCES/0003-eal-fix-IOVA-mode-selection-as-VA-for-PCI-drivers.patch @@ -0,0 +1,461 @@ +From 6054dd5fe1750b52381b9ee83858fe8db1765179 Mon Sep 17 00:00:00 2001 +From: David Marchand +Date: Mon, 22 Jul 2019 14:56:51 +0200 +Subject: [PATCH 3/4] eal: fix IOVA mode selection as VA for PCI drivers + +The incriminated commit broke the use of RTE_PCI_DRV_IOVA_AS_VA which +was intended to mean "driver only supports VA" but had been understood +as "driver supports both PA and VA" by most net drivers and used to let +dpdk processes to run as non root (which do not have access to physical +addresses on recent kernels). + +The check on physical addresses actually closed the gap for those +drivers. We don't need to mark them with RTE_PCI_DRV_IOVA_AS_VA and this +flag can retain its intended meaning. +Document explicitly its meaning. + +We can check that a driver requirement wrt to IOVA mode is fulfilled +before trying to probe a device. + +Finally, document the heuristic used to select the IOVA mode and hope +that we won't break it again. + +Fixes: 703458e19c16 ("bus/pci: consider only usable devices for IOVA mode") + +Signed-off-by: David Marchand +Reviewed-by: Jerin Jacob +Tested-by: Jerin Jacob +Acked-by: Anatoly Burakov + +(cherry picked from commit b76fafb174d2cd5247c3573bb3d49444e195e760) +Signed-off-by: David Marchand + +Conflicts: + drivers/net/avf/avf_ethdev.c + drivers/net/ice/ice_ethdev.c + drivers/net/mlx4/mlx4.c + drivers/net/mlx5/mlx5.c + drivers/net/octeontx2/otx2_ethdev.c + drivers/raw/ioat/ioat_rawdev.c +--- + doc/guides/prog_guide/env_abstraction_layer.rst | 31 +++++++++++++++++++++++++ + drivers/bus/pci/linux/pci.c | 16 +++++-------- + drivers/bus/pci/pci_common.c | 30 +++++++++++++++++++----- + drivers/bus/pci/rte_bus_pci.h | 4 ++-- + drivers/net/atlantic/atl_ethdev.c | 3 +-- + drivers/net/avf/avf_ethdev.c | 3 +-- + drivers/net/bnxt/bnxt_ethdev.c | 3 +-- + drivers/net/e1000/em_ethdev.c | 3 +-- + drivers/net/e1000/igb_ethdev.c | 5 ++-- + drivers/net/enic/enic_ethdev.c | 3 +-- + drivers/net/fm10k/fm10k_ethdev.c | 3 +-- + drivers/net/i40e/i40e_ethdev.c | 3 +-- + drivers/net/i40e/i40e_ethdev_vf.c | 2 +- + drivers/net/ixgbe/ixgbe_ethdev.c | 5 ++-- + drivers/net/nfp/nfp_net.c | 6 ++--- + drivers/net/qede/qede_ethdev.c | 6 ++--- + lib/librte_eal/common/eal_common_bus.c | 30 +++++++++++++++++++++--- + 17 files changed, 106 insertions(+), 50 deletions(-) + +diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst +index 2bb77b0..a29a435 100644 +--- a/doc/guides/prog_guide/env_abstraction_layer.rst ++++ b/doc/guides/prog_guide/env_abstraction_layer.rst +@@ -344,6 +344,37 @@ Misc Functions + + Locks and atomic operations are per-architecture (i686 and x86_64). + ++IOVA Mode Detection ++~~~~~~~~~~~~~~~~~~~ ++ ++IOVA Mode is selected by considering what the current usable Devices on the ++system require and/or support. ++ ++Below is the 2-step heuristic for this choice. ++ ++For the first step, EAL asks each bus its requirement in terms of IOVA mode ++and decides on a preferred IOVA mode. ++ ++- if all buses report RTE_IOVA_PA, then the preferred IOVA mode is RTE_IOVA_PA, ++- if all buses report RTE_IOVA_VA, then the preferred IOVA mode is RTE_IOVA_VA, ++- if all buses report RTE_IOVA_DC, no bus expressed a preferrence, then the ++ preferred mode is RTE_IOVA_DC, ++- if the buses disagree (at least one wants RTE_IOVA_PA and at least one wants ++ RTE_IOVA_VA), then the preferred IOVA mode is RTE_IOVA_DC (see below with the ++ check on Physical Addresses availability), ++ ++The second step checks if the preferred mode complies with the Physical ++Addresses availability since those are only available to root user in recent ++kernels. ++ ++- if the preferred mode is RTE_IOVA_PA but there is no access to Physical ++ Addresses, then EAL init fails early, since later probing of the devices ++ would fail anyway, ++- if the preferred mode is RTE_IOVA_DC then based on the Physical Addresses ++ availability, the preferred mode is adjusted to RTE_IOVA_PA or RTE_IOVA_VA. ++ In the case when the buses had disagreed on the IOVA Mode at the first step, ++ part of the buses won't work because of this decision. ++ + IOVA Mode Configuration + ~~~~~~~~~~~~~~~~~~~~~~~ + +diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c +index b12f10a..1a2f99b 100644 +--- a/drivers/bus/pci/linux/pci.c ++++ b/drivers/bus/pci/linux/pci.c +@@ -578,12 +578,10 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, + else + is_vfio_noiommu_enabled = 0; + } +- if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) == 0) { ++ if (is_vfio_noiommu_enabled != 0) + iova_mode = RTE_IOVA_PA; +- } else if (is_vfio_noiommu_enabled != 0) { +- RTE_LOG(DEBUG, EAL, "Forcing to 'PA', vfio-noiommu mode configured\n"); +- iova_mode = RTE_IOVA_PA; +- } ++ else if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) != 0) ++ iova_mode = RTE_IOVA_VA; + #endif + break; + } +@@ -594,8 +592,8 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, + break; + + default: +- RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n"); +- iova_mode = RTE_IOVA_PA; ++ if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) != 0) ++ iova_mode = RTE_IOVA_VA; + break; + } + +@@ -607,10 +605,8 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, + if (iommu_no_va == -1) + iommu_no_va = pci_one_device_iommu_support_va(pdev) + ? 0 : 1; +- if (iommu_no_va != 0) { +- RTE_LOG(DEBUG, EAL, "Forcing to 'PA', IOMMU does not support IOVA as 'VA'\n"); ++ if (iommu_no_va != 0) + iova_mode = RTE_IOVA_PA; +- } + } + return iova_mode; + } +diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c +index b8cc6d3..ee5d321 100644 +--- a/drivers/bus/pci/pci_common.c ++++ b/drivers/bus/pci/pci_common.c +@@ -169,8 +169,22 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, + * This needs to be before rte_pci_map_device(), as it enables to use + * driver flags for adjusting configuration. + */ +- if (!already_probed) ++ if (!already_probed) { ++ enum rte_iova_mode dev_iova_mode; ++ enum rte_iova_mode iova_mode; ++ ++ dev_iova_mode = pci_device_iova_mode(dr, dev); ++ iova_mode = rte_eal_iova_mode(); ++ if (dev_iova_mode != RTE_IOVA_DC && ++ dev_iova_mode != iova_mode) { ++ RTE_LOG(ERR, EAL, " Expecting '%s' IOVA mode but current mode is '%s', not initializing\n", ++ dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA", ++ iova_mode == RTE_IOVA_PA ? "PA" : "VA"); ++ return -EINVAL; ++ } ++ + dev->driver = dr; ++ } + + if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) { + /* map resources for devices that use igb_uio */ +@@ -583,12 +597,16 @@ rte_pci_get_iommu_class(void) + devices_want_va = true; + } + } +- if (devices_want_pa) { +- iova_mode = RTE_IOVA_PA; +- if (devices_want_va) +- RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'PA' because other devices want it\n"); +- } else if (devices_want_va) { ++ if (devices_want_va && !devices_want_pa) { + iova_mode = RTE_IOVA_VA; ++ } else if (devices_want_pa && !devices_want_va) { ++ iova_mode = RTE_IOVA_PA; ++ } else { ++ iova_mode = RTE_IOVA_DC; ++ if (devices_want_va) { ++ RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'DC' because other devices want 'PA'.\n"); ++ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all devices may be able to initialize.\n"); ++ } + } + return iova_mode; + } +diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h +index f0d6d81..f0fa3a1 100644 +--- a/drivers/bus/pci/rte_bus_pci.h ++++ b/drivers/bus/pci/rte_bus_pci.h +@@ -147,8 +147,8 @@ struct rte_pci_bus { + #define RTE_PCI_DRV_INTR_RMV 0x0010 + /** Device driver needs to keep mapped resources if unsupported dev detected */ + #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 +-/** Device driver supports IOVA as VA */ +-#define RTE_PCI_DRV_IOVA_AS_VA 0X0040 ++/** Device driver only supports IOVA as VA and cannot work with IOVA as PA */ ++#define RTE_PCI_DRV_IOVA_AS_VA 0x0040 + + /** + * Map the PCI device resources in user space virtual memory address +diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c +index 2d05bb4..d3b4b67 100644 +--- a/drivers/net/atlantic/atl_ethdev.c ++++ b/drivers/net/atlantic/atl_ethdev.c +@@ -155,8 +155,7 @@ static const struct rte_pci_id pci_id_atl_map[] = { + + static struct rte_pci_driver rte_atl_pmd = { + .id_table = pci_id_atl_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_atl_pci_probe, + .remove = eth_atl_pci_remove, + }; +diff --git a/drivers/net/avf/avf_ethdev.c b/drivers/net/avf/avf_ethdev.c +index 4dc61d9..6dde939 100644 +--- a/drivers/net/avf/avf_ethdev.c ++++ b/drivers/net/avf/avf_ethdev.c +@@ -1329,8 +1329,7 @@ static int eth_avf_pci_remove(struct rte_pci_device *pci_dev) + /* Adaptive virtual function driver struct */ + static struct rte_pci_driver rte_avf_pmd = { + .id_table = pci_id_avf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_avf_pci_probe, + .remove = eth_avf_pci_remove, + }; +diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c +index e26b9e3..7e764e0 100644 +--- a/drivers/net/bnxt/bnxt_ethdev.c ++++ b/drivers/net/bnxt/bnxt_ethdev.c +@@ -3545,8 +3545,7 @@ static int bnxt_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver bnxt_rte_pmd = { + .id_table = bnxt_pci_id_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | +- RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = bnxt_pci_probe, + .remove = bnxt_pci_remove, + }; +diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c +index 8230824..bdee7bc 100644 +--- a/drivers/net/e1000/em_ethdev.c ++++ b/drivers/net/e1000/em_ethdev.c +@@ -351,8 +351,7 @@ static int eth_em_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_em_pmd = { + .id_table = pci_id_em_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_em_pci_probe, + .remove = eth_em_pci_remove, + }; +diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c +index 87c9aed..7bd0c29 100644 +--- a/drivers/net/e1000/igb_ethdev.c ++++ b/drivers/net/e1000/igb_ethdev.c +@@ -1113,8 +1113,7 @@ static int eth_igb_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_igb_pmd = { + .id_table = pci_id_igb_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_igb_pci_probe, + .remove = eth_igb_pci_remove, + }; +@@ -1137,7 +1136,7 @@ static int eth_igbvf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_igbvf_pmd = { + .id_table = pci_id_igbvf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_igbvf_pci_probe, + .remove = eth_igbvf_pci_remove, + }; +diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c +index 996bb55..1b8d74d 100644 +--- a/drivers/net/enic/enic_ethdev.c ++++ b/drivers/net/enic/enic_ethdev.c +@@ -1077,8 +1077,7 @@ static int eth_enic_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_enic_pmd = { + .id_table = pci_id_enic_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_enic_pci_probe, + .remove = eth_enic_pci_remove, + }; +diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c +index caf4d1b..98583c6 100644 +--- a/drivers/net/fm10k/fm10k_ethdev.c ++++ b/drivers/net/fm10k/fm10k_ethdev.c +@@ -3258,8 +3258,7 @@ static const struct rte_pci_id pci_id_fm10k_map[] = { + + static struct rte_pci_driver rte_pmd_fm10k = { + .id_table = pci_id_fm10k_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_fm10k_pci_probe, + .remove = eth_fm10k_pci_remove, + }; +diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c +index df6eaec..c8d01ac 100644 +--- a/drivers/net/i40e/i40e_ethdev.c ++++ b/drivers/net/i40e/i40e_ethdev.c +@@ -699,8 +699,7 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_i40e_pmd = { + .id_table = pci_id_i40e_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_i40e_pci_probe, + .remove = eth_i40e_pci_remove, + }; +diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c +index 551f6fa..58c1c36 100644 +--- a/drivers/net/i40e/i40e_ethdev_vf.c ++++ b/drivers/net/i40e/i40e_ethdev_vf.c +@@ -1510,7 +1510,7 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_i40evf_pmd = { + .id_table = pci_id_i40evf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_i40evf_pci_probe, + .remove = eth_i40evf_pci_remove, + }; +diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c +index e9533e5..fca15be 100644 +--- a/drivers/net/ixgbe/ixgbe_ethdev.c ++++ b/drivers/net/ixgbe/ixgbe_ethdev.c +@@ -1807,8 +1807,7 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_ixgbe_pmd = { + .id_table = pci_id_ixgbe_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_ixgbe_pci_probe, + .remove = eth_ixgbe_pci_remove, + }; +@@ -1830,7 +1829,7 @@ static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_ixgbevf_pmd = { + .id_table = pci_id_ixgbevf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_ixgbevf_pci_probe, + .remove = eth_ixgbevf_pci_remove, + }; +diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c +index 68c853c..ddd9d9f 100644 +--- a/drivers/net/nfp/nfp_net.c ++++ b/drivers/net/nfp/nfp_net.c +@@ -3297,16 +3297,14 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_nfp_net_pf_pmd = { + .id_table = pci_id_nfp_pf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = nfp_pf_pci_probe, + .remove = eth_nfp_pci_remove, + }; + + static struct rte_pci_driver rte_nfp_net_vf_pmd = { + .id_table = pci_id_nfp_vf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_nfp_pci_probe, + .remove = eth_nfp_pci_remove, + }; +diff --git a/drivers/net/qede/qede_ethdev.c b/drivers/net/qede/qede_ethdev.c +index 0b2f305..518673d 100644 +--- a/drivers/net/qede/qede_ethdev.c ++++ b/drivers/net/qede/qede_ethdev.c +@@ -2735,8 +2735,7 @@ static int qedevf_eth_dev_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_qedevf_pmd = { + .id_table = pci_id_qedevf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = qedevf_eth_dev_pci_probe, + .remove = qedevf_eth_dev_pci_remove, + }; +@@ -2755,8 +2754,7 @@ static int qede_eth_dev_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_qede_pmd = { + .id_table = pci_id_qede_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = qede_eth_dev_pci_probe, + .remove = qede_eth_dev_pci_remove, + }; +diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c +index 77f1be1..0459048 100644 +--- a/lib/librte_eal/common/eal_common_bus.c ++++ b/lib/librte_eal/common/eal_common_bus.c +@@ -228,13 +228,37 @@ rte_bus_find_by_device_name(const char *str) + enum rte_iova_mode + rte_bus_get_iommu_class(void) + { +- int mode = RTE_IOVA_DC; ++ enum rte_iova_mode mode = RTE_IOVA_DC; ++ bool buses_want_va = false; ++ bool buses_want_pa = false; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { ++ enum rte_iova_mode bus_iova_mode; + +- if (bus->get_iommu_class) +- mode |= bus->get_iommu_class(); ++ if (bus->get_iommu_class == NULL) ++ continue; ++ ++ bus_iova_mode = bus->get_iommu_class(); ++ RTE_LOG(DEBUG, EAL, "Bus %s wants IOVA as '%s'\n", ++ bus->name, ++ bus_iova_mode == RTE_IOVA_DC ? "DC" : ++ (bus_iova_mode == RTE_IOVA_PA ? "PA" : "VA")); ++ if (bus_iova_mode == RTE_IOVA_PA) ++ buses_want_pa = true; ++ else if (bus_iova_mode == RTE_IOVA_VA) ++ buses_want_va = true; ++ } ++ if (buses_want_va && !buses_want_pa) { ++ mode = RTE_IOVA_VA; ++ } else if (buses_want_pa && !buses_want_va) { ++ mode = RTE_IOVA_PA; ++ } else { ++ mode = RTE_IOVA_DC; ++ if (buses_want_va) { ++ RTE_LOG(WARNING, EAL, "Some buses want 'VA' but forcing 'DC' because other buses want 'PA'.\n"); ++ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all buses may be able to initialize.\n"); ++ } + } + + return mode; +-- +1.8.3.1 + diff --git a/SOURCES/0004-bus-pci-always-check-IOMMU-capabilities.patch b/SOURCES/0004-bus-pci-always-check-IOMMU-capabilities.patch new file mode 100644 index 0000000..a963cc3 --- /dev/null +++ b/SOURCES/0004-bus-pci-always-check-IOMMU-capabilities.patch @@ -0,0 +1,164 @@ +From 3729b66e42e1a11bdd3ee080a86c07adede8b3b2 Mon Sep 17 00:00:00 2001 +From: David Marchand +Date: Mon, 5 Aug 2019 08:23:26 +0200 +Subject: [PATCH 4/4] bus/pci: always check IOMMU capabilities + +IOMMU capabilities won't change and must be checked even if no PCI device +seem to be supported yet when EAL initialised. + +This is to accommodate with SPDK that registers its drivers after +rte_eal_init(), especially on PPC platform where the IOMMU does not +support VA. + +Fixes: 703458e19c16 ("bus/pci: consider only usable devices for IOVA mode") + +Signed-off-by: David Marchand +Reviewed-by: David Christensen +Acked-by: Jerin Jacob +Tested-by: Jerin Jacob +Tested-by: Takeshi Yoshimura + +(cherry picked from commit 66d3724b2c87e6fcdf3851ca191683696a91b901) +Signed-off-by: David Marchand +--- + drivers/bus/pci/bsd/pci.c | 6 ++++++ + drivers/bus/pci/linux/pci.c | 25 ++++++------------------- + drivers/bus/pci/pci_common.c | 16 +++++++++++++++- + drivers/bus/pci/private.h | 5 ++++- + 4 files changed, 31 insertions(+), 21 deletions(-) + +diff --git a/drivers/bus/pci/bsd/pci.c b/drivers/bus/pci/bsd/pci.c +index 0f23f12..42f4353 100644 +--- a/drivers/bus/pci/bsd/pci.c ++++ b/drivers/bus/pci/bsd/pci.c +@@ -376,6 +376,12 @@ error: + return -1; + } + ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) ++{ ++ return false; ++} ++ + enum rte_iova_mode + pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused, + const struct rte_pci_device *pdev) +diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c +index 1a2f99b..6a6f78b 100644 +--- a/drivers/bus/pci/linux/pci.c ++++ b/drivers/bus/pci/linux/pci.c +@@ -498,8 +498,8 @@ error: + } + + #if defined(RTE_ARCH_X86) +-static bool +-pci_one_device_iommu_support_va(const struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(const struct rte_pci_device *dev) + { + #define VTD_CAP_MGAW_SHIFT 16 + #define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT) +@@ -547,14 +547,14 @@ pci_one_device_iommu_support_va(const struct rte_pci_device *dev) + return true; + } + #elif defined(RTE_ARCH_PPC_64) +-static bool +-pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return false; + } + #else +-static bool +-pci_one_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return true; + } +@@ -565,7 +565,6 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, + const struct rte_pci_device *pdev) + { + enum rte_iova_mode iova_mode = RTE_IOVA_DC; +- static int iommu_no_va = -1; + + switch (pdev->kdrv) { + case RTE_KDRV_VFIO: { +@@ -596,18 +595,6 @@ pci_device_iova_mode(const struct rte_pci_driver *pdrv, + iova_mode = RTE_IOVA_VA; + break; + } +- +- if (iova_mode != RTE_IOVA_PA) { +- /* +- * We can check this only once, because the IOMMU hardware is +- * the same for all of them. +- */ +- if (iommu_no_va == -1) +- iommu_no_va = pci_one_device_iommu_support_va(pdev) +- ? 0 : 1; +- if (iommu_no_va != 0) +- iova_mode = RTE_IOVA_PA; +- } + return iova_mode; + } + +diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c +index ee5d321..9e7106f 100644 +--- a/drivers/bus/pci/pci_common.c ++++ b/drivers/bus/pci/pci_common.c +@@ -570,8 +570,16 @@ rte_pci_get_iommu_class(void) + const struct rte_pci_driver *drv; + bool devices_want_va = false; + bool devices_want_pa = false; ++ int iommu_no_va = -1; + + FOREACH_DEVICE_ON_PCIBUS(dev) { ++ /* ++ * We can check this only once, because the IOMMU hardware is ++ * the same for all of them. ++ */ ++ if (iommu_no_va == -1) ++ iommu_no_va = pci_device_iommu_support_va(dev) ++ ? 0 : 1; + if (pci_ignore_device(dev)) + continue; + if (dev->kdrv == RTE_KDRV_UNKNOWN || +@@ -597,7 +605,13 @@ rte_pci_get_iommu_class(void) + devices_want_va = true; + } + } +- if (devices_want_va && !devices_want_pa) { ++ if (iommu_no_va == 1) { ++ iova_mode = RTE_IOVA_PA; ++ if (devices_want_va) { ++ RTE_LOG(WARNING, EAL, "Some devices want 'VA' but IOMMU does not support 'VA'.\n"); ++ RTE_LOG(WARNING, EAL, "The devices that want 'VA' won't initialize.\n"); ++ } ++ } else if (devices_want_va && !devices_want_pa) { + iova_mode = RTE_IOVA_VA; + } else if (devices_want_pa && !devices_want_va) { + iova_mode = RTE_IOVA_PA; +diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h +index 8a55240..a205d4d 100644 +--- a/drivers/bus/pci/private.h ++++ b/drivers/bus/pci/private.h +@@ -173,9 +173,12 @@ rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + + /** +- * OS specific callback for rte_pci_get_iommu_class ++ * OS specific callbacks for rte_pci_get_iommu_class + * + */ ++bool ++pci_device_iommu_support_va(const struct rte_pci_device *dev); ++ + enum rte_iova_mode + pci_device_iova_mode(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); +-- +1.8.3.1 + diff --git a/SOURCES/arm64-armv8a-linuxapp-gcc-config b/SOURCES/arm64-armv8a-linuxapp-gcc-config index 00825da..f2a0fa3 100644 --- a/SOURCES/arm64-armv8a-linuxapp-gcc-config +++ b/SOURCES/arm64-armv8a-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 9fc8b53ccd53cc8b64391f6252e1dba558ae660a73a72f10dcadff2ca5462243 +# -*- cfg-sha: 605773f9defc66f8bb966065cca04e8a2384a95d97e738b7123db77319820df3 # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2015 Cavium, Inc # SPDX-License-Identifier: BSD-3-Clause @@ -57,7 +57,6 @@ CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO CONFIG_RTE_LOG_HISTORY=256 CONFIG_RTE_BACKTRACE=y CONFIG_RTE_LIBEAL_USE_HPET=n -CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_EAL_IGB_UIO=n CONFIG_RTE_EAL_VFIO=y diff --git a/SOURCES/ppc_64-power8-linuxapp-gcc-config b/SOURCES/ppc_64-power8-linuxapp-gcc-config index e5d9380..9f53d6e 100644 --- a/SOURCES/ppc_64-power8-linuxapp-gcc-config +++ b/SOURCES/ppc_64-power8-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: ac783e64ca20c977a7c1c42e72e6dce151b31aa9aecfbfa121b45e49e938f418 +# -*- cfg-sha: 64cb6ddcd2aa862a6cc9bcb3de422441794ff18e6301fc5091cc89ae53d2cf28 # BSD LICENSE # Copyright (C) IBM Corporation 2014. # Redistribution and use in source and binary forms, with or without @@ -78,7 +78,6 @@ CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO CONFIG_RTE_LOG_HISTORY=256 CONFIG_RTE_BACKTRACE=y CONFIG_RTE_LIBEAL_USE_HPET=n -CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_EAL_IGB_UIO=n CONFIG_RTE_EAL_VFIO=y diff --git a/SOURCES/x86_64-native-linuxapp-gcc-config b/SOURCES/x86_64-native-linuxapp-gcc-config index 55a3fdf..d7caa01 100644 --- a/SOURCES/x86_64-native-linuxapp-gcc-config +++ b/SOURCES/x86_64-native-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 2ba93102021dc5d38494cf5090c3ecaca37db13153dd558b1511a56f2a3d9b10 +# -*- cfg-sha: f06f2b5234bcb38cbde09c4732b27af067c30d7f3aff6966eb2c81501add74bb # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2010-2014 Intel Corporation # SPDX-License-Identifier: BSD-3-Clause @@ -55,7 +55,6 @@ CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO CONFIG_RTE_LOG_HISTORY=256 CONFIG_RTE_BACKTRACE=y CONFIG_RTE_LIBEAL_USE_HPET=n -CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_EAL_IGB_UIO=n CONFIG_RTE_EAL_VFIO=y diff --git a/SPECS/dpdk.spec b/SPECS/dpdk.spec index 97713a2..df2ebc9 100644 --- a/SPECS/dpdk.spec +++ b/SPECS/dpdk.spec @@ -8,7 +8,7 @@ #% define date 20181127 #% define shortcommit0 %(c=%{commit0}; echo ${c:0:7}) -%define ver 18.11.5 +%define ver 18.11.8 %define rel 1 %define srcname dpdk-stable @@ -36,8 +36,17 @@ Source504: arm64-armv8a-linuxapp-gcc-config Source505: ppc_64-power8-linuxapp-gcc-config Source506: x86_64-native-linuxapp-gcc-config -# Patches only in dpdk package +# Bug #1711739 +Patch1020: 0001-eal-compute-IOVA-mode-based-on-PA-availability.patch +Patch1021: 0002-bus-pci-consider-only-usable-devices-for-IOVA-mode.patch +Patch1022: 0003-eal-fix-IOVA-mode-selection-as-VA-for-PCI-drivers.patch +Patch1023: 0004-bus-pci-always-check-IOMMU-capabilities.patch +# Bug #1719644 & #1733402 +Patch1031: 0001-net-i40e-re-program-promiscuous-mode-on-VF-interface.patch + +# Bug #1726579 +Patch1040: 0001-vhost-add-device-op-when-notification-to-guest-is-se.patch Summary: Set of libraries and drivers for fast packet processing @@ -144,7 +153,7 @@ unset RTE_SDK RTE_INCLUDE RTE_TARGET # Avoid appending second -Wall to everything, it breaks upstream warning # disablers in makefiles. Strip expclit -march= from optflags since they # will only guarantee build failures, DPDK is picky with that. -export EXTRA_CFLAGS="$(echo %{optflags} | sed -e 's:-Wall::g' -e 's:-march=[[:alnum:]]* ::g') -Wformat -fPIC" +export EXTRA_CFLAGS="$(echo %{optflags} | sed -e 's:-Wall::g' -e 's:-march=[[:alnum:]]* ::g') -Wformat -fPIC -fcommon" # DPDK defaults to using builder-specific compiler flags. However, # the config has been changed by specifying CONFIG_RTE_MACHINE=default @@ -278,6 +287,13 @@ sed -i -e 's:-%{machine_tmpl}-:-%{machine}-:g' %{buildroot}/%{_sysconfdir}/profi %endif %changelog +* Wed May 20 2020 Timothy Redaelli - 18.11.8-1 +- Updated to DPDK 18.11.8 (#1836829, #1837025) + +* Fri Apr 17 2020 Timothy Redaelli - 18.11.7-1 +- Updated to DPDK 18.11.7 (#1825276) +- Align patches with DPDK included in OVS 2.11 + * Fri Dec 20 2019 Timothy Redaelli - 18.11.5-1 - Updated to DPDK 18.11.5 that includes the fixes for CVE-2019-14818 (#1777135)