Blame SOURCES/kexec-tools-2.0.15-ppc64-avoid-adding-coherent-memory-regio.patch

06c2a2
From hbathini at linux.vnet.ibm.com  Thu Aug 17 05:31:51 2017
06c2a2
From: hbathini at linux.vnet.ibm.com (Hari Bathini)
06c2a2
Date: Thu, 17 Aug 2017 18:01:51 +0530
06c2a2
Subject: [PATCH] kexec-tools: ppc64: avoid adding coherent memory regions to
06c2a2
 crash memory ranges
06c2a2
Message-ID: <150297311110.25328.11468130779639120510.stgit@hbathini.in.ibm.com>
06c2a2
Content-Length: 3407
06c2a2
Lines: 121
06c2a2
06c2a2
Accelerator devices like GPU and FPGA cards contain onboard memory. This
06c2a2
onboard memory is represented as a memory only NUMA node, integrating it
06c2a2
with core memory subsystem. Since, the link through which these devices
06c2a2
are integrated to core memory goes down after a system crash and they are
06c2a2
meant for user workloads, avoid adding coherent device memory regions to
06c2a2
crash memory ranges. Without this change, makedumpfile tool tries to save
06c2a2
unaccessible coherent device memory regions, crashing the system.
06c2a2
06c2a2
Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
06c2a2
---
06c2a2
 kexec/arch/ppc64/crashdump-ppc64.c |   64 +++++++++++++++++++++++++++++++++++-
06c2a2
 kexec/arch/ppc64/kexec-ppc64.h     |    1 +
06c2a2
 2 files changed, 63 insertions(+), 2 deletions(-)
06c2a2
06c2a2
diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c
06c2a2
index 13995bf..7ea3983 100644
06c2a2
--- a/kexec/arch/ppc64/crashdump-ppc64.c
06c2a2
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
06c2a2
@@ -181,6 +181,53 @@ static int get_dyn_reconf_crash_memory_ranges(void)
06c2a2
 	return 0;
06c2a2
 }
06c2a2
 
06c2a2
+/*
06c2a2
+ * For a given memory node, check if it is mapped to system RAM or
06c2a2
+ * to onboard memory on accelerator device like GPU card or such.
06c2a2
+ */
06c2a2
+static int is_coherent_device_mem(const char *fname)
06c2a2
+{
06c2a2
+	char fpath[PATH_LEN];
06c2a2
+	char buf[32];
06c2a2
+	DIR *dmem;
06c2a2
+	FILE *file;
06c2a2
+	struct dirent *mentry;
06c2a2
+	int cnt, ret = 0;
06c2a2
+
06c2a2
+	strcpy(fpath, fname);
06c2a2
+	if ((dmem = opendir(fpath)) == NULL) {
06c2a2
+		perror(fpath);
06c2a2
+		return -1;
06c2a2
+	}
06c2a2
+
06c2a2
+	while ((mentry = readdir(dmem)) != NULL) {
06c2a2
+		if (strcmp(mentry->d_name, "compatible"))
06c2a2
+			continue;
06c2a2
+
06c2a2
+		strcat(fpath, "/compatible");
06c2a2
+		if ((file = fopen(fpath, "r")) == NULL) {
06c2a2
+			perror(fpath);
06c2a2
+			ret = -1;
06c2a2
+			break;
06c2a2
+		}
06c2a2
+		if ((cnt = fread(buf, 1, 32, file)) < 0) {
06c2a2
+			perror(fpath);
06c2a2
+			fclose(file);
06c2a2
+			ret = -1;
06c2a2
+			break;
06c2a2
+		}
06c2a2
+		if (!strncmp(buf, "ibm,coherent-device-memory", 26)) {
06c2a2
+			ret = 1;
06c2a2
+			break;
06c2a2
+		}
06c2a2
+		fclose(file);
06c2a2
+	}
06c2a2
+
06c2a2
+	closedir(dmem);
06c2a2
+	return ret;
06c2a2
+}
06c2a2
+
06c2a2
+
06c2a2
 /* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
06c2a2
  * create Elf headers. Keeping it separate from get_memory_ranges() as
06c2a2
  * requirements are different in the case of normal kexec and crashdumps.
06c2a2
@@ -196,12 +243,12 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
06c2a2
 {
06c2a2
 
06c2a2
 	char device_tree[256] = "/proc/device-tree/";
06c2a2
-	char fname[256];
06c2a2
+	char fname[PATH_LEN];
06c2a2
 	char buf[MAXBYTES];
06c2a2
 	DIR *dir, *dmem;
06c2a2
 	FILE *file;
06c2a2
 	struct dirent *dentry, *mentry;
06c2a2
-	int n, crash_rng_len = 0;
06c2a2
+	int n, ret, crash_rng_len = 0;
06c2a2
 	unsigned long long start, end;
06c2a2
 	int page_size;
06c2a2
 
06c2a2
@@ -240,6 +287,19 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
06c2a2
 			continue;
06c2a2
 		strcpy(fname, device_tree);
06c2a2
 		strcat(fname, dentry->d_name);
06c2a2
+
06c2a2
+		ret = is_coherent_device_mem(fname);
06c2a2
+		if (ret == -1) {
06c2a2
+			closedir(dir);
06c2a2
+			goto err;
06c2a2
+		} else if (ret == 1) {
06c2a2
+			/*
06c2a2
+			 * Avoid adding this memory region as it is not
06c2a2
+			 * mapped to system RAM.
06c2a2
+			 */
06c2a2
+			continue;
06c2a2
+		}
06c2a2
+
06c2a2
 		if ((dmem = opendir(fname)) == NULL) {
06c2a2
 			perror(fname);
06c2a2
 			closedir(dir);
06c2a2
diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h
06c2a2
index 633ae77..434b4bf 100644
06c2a2
--- a/kexec/arch/ppc64/kexec-ppc64.h
06c2a2
+++ b/kexec/arch/ppc64/kexec-ppc64.h
06c2a2
@@ -1,6 +1,7 @@
06c2a2
 #ifndef KEXEC_PPC64_H
06c2a2
 #define KEXEC_PPC64_H
06c2a2
 
06c2a2
+#define PATH_LEN 256
06c2a2
 #define MAXBYTES 128
06c2a2
 #define MAX_LINE 160
06c2a2
 #define CORE_TYPE_ELF32 1
06c2a2
06c2a2
06c2a2