|
|
cf4a81 |
From hbathini at linux.vnet.ibm.com Thu Aug 17 05:31:51 2017
|
|
|
cf4a81 |
From: hbathini at linux.vnet.ibm.com (Hari Bathini)
|
|
|
cf4a81 |
Date: Thu, 17 Aug 2017 18:01:51 +0530
|
|
|
cf4a81 |
Subject: [PATCH] kexec-tools: ppc64: avoid adding coherent memory regions to
|
|
|
cf4a81 |
crash memory ranges
|
|
|
cf4a81 |
Message-ID: <150297311110.25328.11468130779639120510.stgit@hbathini.in.ibm.com>
|
|
|
cf4a81 |
Content-Length: 3407
|
|
|
cf4a81 |
Lines: 121
|
|
|
cf4a81 |
|
|
|
cf4a81 |
Accelerator devices like GPU and FPGA cards contain onboard memory. This
|
|
|
cf4a81 |
onboard memory is represented as a memory only NUMA node, integrating it
|
|
|
cf4a81 |
with core memory subsystem. Since, the link through which these devices
|
|
|
cf4a81 |
are integrated to core memory goes down after a system crash and they are
|
|
|
cf4a81 |
meant for user workloads, avoid adding coherent device memory regions to
|
|
|
cf4a81 |
crash memory ranges. Without this change, makedumpfile tool tries to save
|
|
|
cf4a81 |
unaccessible coherent device memory regions, crashing the system.
|
|
|
cf4a81 |
|
|
|
cf4a81 |
Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
|
|
|
cf4a81 |
---
|
|
|
cf4a81 |
kexec/arch/ppc64/crashdump-ppc64.c | 64 +++++++++++++++++++++++++++++++++++-
|
|
|
cf4a81 |
kexec/arch/ppc64/kexec-ppc64.h | 1 +
|
|
|
cf4a81 |
2 files changed, 63 insertions(+), 2 deletions(-)
|
|
|
cf4a81 |
|
|
|
cf4a81 |
diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c
|
|
|
cf4a81 |
index 13995bf..7ea3983 100644
|
|
|
cf4a81 |
--- a/kexec/arch/ppc64/crashdump-ppc64.c
|
|
|
cf4a81 |
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
|
|
|
cf4a81 |
@@ -181,6 +181,53 @@ static int get_dyn_reconf_crash_memory_ranges(void)
|
|
|
cf4a81 |
return 0;
|
|
|
cf4a81 |
}
|
|
|
cf4a81 |
|
|
|
cf4a81 |
+/*
|
|
|
cf4a81 |
+ * For a given memory node, check if it is mapped to system RAM or
|
|
|
cf4a81 |
+ * to onboard memory on accelerator device like GPU card or such.
|
|
|
cf4a81 |
+ */
|
|
|
cf4a81 |
+static int is_coherent_device_mem(const char *fname)
|
|
|
cf4a81 |
+{
|
|
|
cf4a81 |
+ char fpath[PATH_LEN];
|
|
|
cf4a81 |
+ char buf[32];
|
|
|
cf4a81 |
+ DIR *dmem;
|
|
|
cf4a81 |
+ FILE *file;
|
|
|
cf4a81 |
+ struct dirent *mentry;
|
|
|
cf4a81 |
+ int cnt, ret = 0;
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+ strcpy(fpath, fname);
|
|
|
cf4a81 |
+ if ((dmem = opendir(fpath)) == NULL) {
|
|
|
cf4a81 |
+ perror(fpath);
|
|
|
cf4a81 |
+ return -1;
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+ while ((mentry = readdir(dmem)) != NULL) {
|
|
|
cf4a81 |
+ if (strcmp(mentry->d_name, "compatible"))
|
|
|
cf4a81 |
+ continue;
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+ strcat(fpath, "/compatible");
|
|
|
cf4a81 |
+ if ((file = fopen(fpath, "r")) == NULL) {
|
|
|
cf4a81 |
+ perror(fpath);
|
|
|
cf4a81 |
+ ret = -1;
|
|
|
cf4a81 |
+ break;
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+ if ((cnt = fread(buf, 1, 32, file)) < 0) {
|
|
|
cf4a81 |
+ perror(fpath);
|
|
|
cf4a81 |
+ fclose(file);
|
|
|
cf4a81 |
+ ret = -1;
|
|
|
cf4a81 |
+ break;
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+ if (!strncmp(buf, "ibm,coherent-device-memory", 26)) {
|
|
|
cf4a81 |
+ ret = 1;
|
|
|
cf4a81 |
+ break;
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+ fclose(file);
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+ closedir(dmem);
|
|
|
cf4a81 |
+ return ret;
|
|
|
cf4a81 |
+}
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
|
|
|
cf4a81 |
* create Elf headers. Keeping it separate from get_memory_ranges() as
|
|
|
cf4a81 |
* requirements are different in the case of normal kexec and crashdumps.
|
|
|
cf4a81 |
@@ -196,12 +243,12 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
|
|
|
cf4a81 |
{
|
|
|
cf4a81 |
|
|
|
cf4a81 |
char device_tree[256] = "/proc/device-tree/";
|
|
|
cf4a81 |
- char fname[256];
|
|
|
cf4a81 |
+ char fname[PATH_LEN];
|
|
|
cf4a81 |
char buf[MAXBYTES];
|
|
|
cf4a81 |
DIR *dir, *dmem;
|
|
|
cf4a81 |
FILE *file;
|
|
|
cf4a81 |
struct dirent *dentry, *mentry;
|
|
|
cf4a81 |
- int n, crash_rng_len = 0;
|
|
|
cf4a81 |
+ int n, ret, crash_rng_len = 0;
|
|
|
cf4a81 |
unsigned long long start, end;
|
|
|
cf4a81 |
int page_size;
|
|
|
cf4a81 |
|
|
|
cf4a81 |
@@ -240,6 +287,19 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
|
|
|
cf4a81 |
continue;
|
|
|
cf4a81 |
strcpy(fname, device_tree);
|
|
|
cf4a81 |
strcat(fname, dentry->d_name);
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
+ ret = is_coherent_device_mem(fname);
|
|
|
cf4a81 |
+ if (ret == -1) {
|
|
|
cf4a81 |
+ closedir(dir);
|
|
|
cf4a81 |
+ goto err;
|
|
|
cf4a81 |
+ } else if (ret == 1) {
|
|
|
cf4a81 |
+ /*
|
|
|
cf4a81 |
+ * Avoid adding this memory region as it is not
|
|
|
cf4a81 |
+ * mapped to system RAM.
|
|
|
cf4a81 |
+ */
|
|
|
cf4a81 |
+ continue;
|
|
|
cf4a81 |
+ }
|
|
|
cf4a81 |
+
|
|
|
cf4a81 |
if ((dmem = opendir(fname)) == NULL) {
|
|
|
cf4a81 |
perror(fname);
|
|
|
cf4a81 |
closedir(dir);
|
|
|
cf4a81 |
diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h
|
|
|
cf4a81 |
index 633ae77..434b4bf 100644
|
|
|
cf4a81 |
--- a/kexec/arch/ppc64/kexec-ppc64.h
|
|
|
cf4a81 |
+++ b/kexec/arch/ppc64/kexec-ppc64.h
|
|
|
cf4a81 |
@@ -1,6 +1,7 @@
|
|
|
cf4a81 |
#ifndef KEXEC_PPC64_H
|
|
|
cf4a81 |
#define KEXEC_PPC64_H
|
|
|
cf4a81 |
|
|
|
cf4a81 |
+#define PATH_LEN 256
|
|
|
cf4a81 |
#define MAXBYTES 128
|
|
|
cf4a81 |
#define MAX_LINE 160
|
|
|
cf4a81 |
#define CORE_TYPE_ELF32 1
|
|
|
cf4a81 |
|
|
|
cf4a81 |
|
|
|
cf4a81 |
|