Blame SOURCES/0001-eal-compute-IOVA-mode-based-on-PA-availability.patch

eb2664
From a6fc8e35d9e72b2acd605b6c6a8b08d2541c0609 Mon Sep 17 00:00:00 2001
eb2664
From: Ben Walker <benjamin.walker@intel.com>
eb2664
Date: Fri, 14 Jun 2019 11:39:16 +0200
eb2664
Subject: [PATCH 1/4] eal: compute IOVA mode based on PA availability
eb2664
eb2664
Currently, if the bus selects IOVA as PA, the memory init can fail when
eb2664
lacking access to physical addresses.
eb2664
This can be quite hard for normal users to understand what is wrong
eb2664
since this is the default behavior.
eb2664
eb2664
Catch this situation earlier in eal init by validating physical addresses
eb2664
availability, or select IOVA when no clear preferrence had been expressed.
eb2664
eb2664
The bus code is changed so that it reports when it does not care about
eb2664
the IOVA mode and let the eal init decide.
eb2664
eb2664
In Linux implementation, rework rte_eal_using_phys_addrs() so that it can
eb2664
be called earlier but still avoid a circular dependency with
eb2664
rte_mem_virt2phys().
eb2664
In FreeBSD implementation, rte_eal_using_phys_addrs() always returns
eb2664
false, so the detection part is left as is.
eb2664
eb2664
If librte_kni is compiled in and the KNI kmod is loaded,
eb2664
- if the buses requested VA, force to PA if physical addresses are
eb2664
  available as it was done before,
eb2664
- else, keep iova as VA, KNI init will fail later.
eb2664
eb2664
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
eb2664
Signed-off-by: David Marchand <david.marchand@redhat.com>
eb2664
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
eb2664
eb2664
(cherry picked from commit c2361bab70c56f64e50f07946b1b20bf688d782a)
eb2664
Signed-off-by: David Marchand <david.marchand@redhat.com>
eb2664
---
eb2664
 lib/librte_eal/bsdapp/eal/eal.c          | 10 +++++--
eb2664
 lib/librte_eal/common/eal_common_bus.c   |  4 ---
eb2664
 lib/librte_eal/common/include/rte_bus.h  |  2 +-
eb2664
 lib/librte_eal/linuxapp/eal/eal.c        | 38 ++++++++++++++++++++------
eb2664
 lib/librte_eal/linuxapp/eal/eal_memory.c | 46 +++++++++-----------------------
eb2664
 5 files changed, 51 insertions(+), 49 deletions(-)
eb2664
eb2664
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
eb2664
index bfac7fd..14ae853 100644
eb2664
--- a/lib/librte_eal/bsdapp/eal/eal.c
eb2664
+++ b/lib/librte_eal/bsdapp/eal/eal.c
eb2664
@@ -689,13 +689,19 @@ rte_eal_init(int argc, char **argv)
eb2664
 	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
eb2664
 	if (internal_config.iova_mode == RTE_IOVA_DC) {
eb2664
 		/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
eb2664
-		rte_eal_get_configuration()->iova_mode =
eb2664
-			rte_bus_get_iommu_class();
eb2664
+		enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
eb2664
+
eb2664
+		if (iova_mode == RTE_IOVA_DC)
eb2664
+			iova_mode = RTE_IOVA_PA;
eb2664
+		rte_eal_get_configuration()->iova_mode = iova_mode;
eb2664
 	} else {
eb2664
 		rte_eal_get_configuration()->iova_mode =
eb2664
 			internal_config.iova_mode;
eb2664
 	}
eb2664
 
eb2664
+	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
eb2664
+		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
eb2664
+
eb2664
 	if (internal_config.no_hugetlbfs == 0) {
eb2664
 		/* rte_config isn't initialized yet */
eb2664
 		ret = internal_config.process_type == RTE_PROC_PRIMARY ?
eb2664
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
eb2664
index c8f1901..77f1be1 100644
eb2664
--- a/lib/librte_eal/common/eal_common_bus.c
eb2664
+++ b/lib/librte_eal/common/eal_common_bus.c
eb2664
@@ -237,10 +237,6 @@ rte_bus_get_iommu_class(void)
eb2664
 			mode |= bus->get_iommu_class();
eb2664
 	}
eb2664
 
eb2664
-	if (mode != RTE_IOVA_VA) {
eb2664
-		/* Use default IOVA mode */
eb2664
-		mode = RTE_IOVA_PA;
eb2664
-	}
eb2664
 	return mode;
eb2664
 }
eb2664
 
eb2664
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
eb2664
index 6be4b5c..b87e23b 100644
eb2664
--- a/lib/librte_eal/common/include/rte_bus.h
eb2664
+++ b/lib/librte_eal/common/include/rte_bus.h
eb2664
@@ -348,7 +348,7 @@ struct rte_bus *rte_bus_find_by_name(const char *busname);
eb2664
 
eb2664
 /**
eb2664
  * Get the common iommu class of devices bound on to buses available in the
eb2664
- * system. The default mode is PA.
eb2664
+ * system. RTE_IOVA_DC means that no preferrence has been expressed.
eb2664
  *
eb2664
  * @return
eb2664
  *     enum rte_iova_mode value.
eb2664
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
eb2664
index 7a08cf1..6899307 100644
eb2664
--- a/lib/librte_eal/linuxapp/eal/eal.c
eb2664
+++ b/lib/librte_eal/linuxapp/eal/eal.c
eb2664
@@ -943,6 +943,7 @@ rte_eal_init(int argc, char **argv)
eb2664
 	static char logid[PATH_MAX];
eb2664
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
eb2664
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
eb2664
+	bool phys_addrs;
eb2664
 
eb2664
 	/* checks if the machine is adequate */
eb2664
 	if (!rte_cpu_is_supported()) {
eb2664
@@ -1030,25 +1031,46 @@ rte_eal_init(int argc, char **argv)
eb2664
 		return -1;
eb2664
 	}
eb2664
 
eb2664
+	phys_addrs = rte_eal_using_phys_addrs() != 0;
eb2664
+
eb2664
 	/* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
eb2664
 	if (internal_config.iova_mode == RTE_IOVA_DC) {
eb2664
-		/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
eb2664
-		rte_eal_get_configuration()->iova_mode =
eb2664
-			rte_bus_get_iommu_class();
eb2664
+		/* autodetect the IOVA mapping mode */
eb2664
+		enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
eb2664
 
eb2664
+		if (iova_mode == RTE_IOVA_DC) {
eb2664
+			iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA;
eb2664
+			RTE_LOG(DEBUG, EAL,
eb2664
+				"Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n",
eb2664
+				phys_addrs ? "PA" : "VA");
eb2664
+		}
eb2664
+#ifdef RTE_LIBRTE_KNI
eb2664
 		/* Workaround for KNI which requires physical address to work */
eb2664
-		if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
eb2664
+		if (iova_mode == RTE_IOVA_VA &&
eb2664
 				rte_eal_check_module("rte_kni") == 1) {
eb2664
-			rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
eb2664
-			RTE_LOG(WARNING, EAL,
eb2664
-				"Some devices want IOVA as VA but PA will be used because.. "
eb2664
-				"KNI module inserted\n");
eb2664
+			if (phys_addrs) {
eb2664
+				iova_mode = RTE_IOVA_PA;
eb2664
+				RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n");
eb2664
+			} else {
eb2664
+				RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n");
eb2664
+			}
eb2664
 		}
eb2664
+#endif
eb2664
+		rte_eal_get_configuration()->iova_mode = iova_mode;
eb2664
 	} else {
eb2664
 		rte_eal_get_configuration()->iova_mode =
eb2664
 			internal_config.iova_mode;
eb2664
 	}
eb2664
 
eb2664
+	if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) {
eb2664
+		rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available");
eb2664
+		rte_errno = EINVAL;
eb2664
+		return -1;
eb2664
+	}
eb2664
+
eb2664
+	RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n",
eb2664
+		rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA");
eb2664
+
eb2664
 	if (internal_config.no_hugetlbfs == 0) {
eb2664
 		/* rte_config isn't initialized yet */
eb2664
 		ret = internal_config.process_type == RTE_PROC_PRIMARY ?
eb2664
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
eb2664
index 898bdb7..24d99c0 100644
eb2664
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
eb2664
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
eb2664
@@ -62,34 +62,10 @@
eb2664
  * zone as well as a physical contiguous zone.
eb2664
  */
eb2664
 
eb2664
-static bool phys_addrs_available = true;
eb2664
+static int phys_addrs_available = -1;
eb2664
 
eb2664
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
eb2664
 
eb2664
-static void
eb2664
-test_phys_addrs_available(void)
eb2664
-{
eb2664
-	uint64_t tmp = 0;
eb2664
-	phys_addr_t physaddr;
eb2664
-
eb2664
-	if (!rte_eal_has_hugepages()) {
eb2664
-		RTE_LOG(ERR, EAL,
eb2664
-			"Started without hugepages support, physical addresses not available\n");
eb2664
-		phys_addrs_available = false;
eb2664
-		return;
eb2664
-	}
eb2664
-
eb2664
-	physaddr = rte_mem_virt2phy(&tmp);
eb2664
-	if (physaddr == RTE_BAD_PHYS_ADDR) {
eb2664
-		if (rte_eal_iova_mode() == RTE_IOVA_PA)
eb2664
-			RTE_LOG(ERR, EAL,
eb2664
-				"Cannot obtain physical addresses: %s. "
eb2664
-				"Only vfio will function.\n",
eb2664
-				strerror(errno));
eb2664
-		phys_addrs_available = false;
eb2664
-	}
eb2664
-}
eb2664
-
eb2664
 /*
eb2664
  * Get physical address of any mapped virtual address in the current process.
eb2664
  */
eb2664
@@ -102,8 +78,7 @@ rte_mem_virt2phy(const void *virtaddr)
eb2664
 	int page_size;
eb2664
 	off_t offset;
eb2664
 
eb2664
-	/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
eb2664
-	if (!phys_addrs_available)
eb2664
+	if (phys_addrs_available == 0)
eb2664
 		return RTE_BAD_IOVA;
eb2664
 
eb2664
 	/* standard page size */
eb2664
@@ -1332,8 +1307,6 @@ eal_legacy_hugepage_init(void)
eb2664
 	int nr_hugefiles, nr_hugepages = 0;
eb2664
 	void *addr;
eb2664
 
eb2664
-	test_phys_addrs_available();
eb2664
-
eb2664
 	memset(used_hp, 0, sizeof(used_hp));
eb2664
 
eb2664
 	/* get pointer to global configuration */
eb2664
@@ -1466,7 +1439,7 @@ eal_legacy_hugepage_init(void)
eb2664
 				continue;
eb2664
 		}
eb2664
 
eb2664
-		if (phys_addrs_available &&
eb2664
+		if (rte_eal_using_phys_addrs() &&
eb2664
 				rte_eal_iova_mode() != RTE_IOVA_VA) {
eb2664
 			/* find physical addresses for each hugepage */
eb2664
 			if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
eb2664
@@ -1685,8 +1658,6 @@ eal_hugepage_init(void)
eb2664
 	uint64_t memory[RTE_MAX_NUMA_NODES];
eb2664
 	int hp_sz_idx, socket_id;
eb2664
 
eb2664
-	test_phys_addrs_available();
eb2664
-
eb2664
 	memset(used_hp, 0, sizeof(used_hp));
eb2664
 
eb2664
 	for (hp_sz_idx = 0;
eb2664
@@ -1812,8 +1783,6 @@ eal_legacy_hugepage_attach(void)
eb2664
 				"into secondary processes\n");
eb2664
 	}
eb2664
 
eb2664
-	test_phys_addrs_available();
eb2664
-
eb2664
 	fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
eb2664
 	if (fd_hugepage < 0) {
eb2664
 		RTE_LOG(ERR, EAL, "Could not open %s\n",
eb2664
@@ -1953,6 +1922,15 @@ rte_eal_hugepage_attach(void)
eb2664
 int
eb2664
 rte_eal_using_phys_addrs(void)
eb2664
 {
eb2664
+	if (phys_addrs_available == -1) {
eb2664
+		uint64_t tmp = 0;
eb2664
+
eb2664
+		if (rte_eal_has_hugepages() != 0 &&
eb2664
+		    rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
eb2664
+			phys_addrs_available = 1;
eb2664
+		else
eb2664
+			phys_addrs_available = 0;
eb2664
+	}
eb2664
 	return phys_addrs_available;
eb2664
 }
eb2664
 
eb2664
-- 
eb2664
1.8.3.1
eb2664