diff --git a/SOURCES/opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch b/SOURCES/opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch new file mode 100644 index 0000000..0337a99 --- /dev/null +++ b/SOURCES/opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch @@ -0,0 +1,147 @@ +commit 8cbd0de88d162e387f11569eee1bdecef8fad2e3 +Author: Oliver O'Halloran +Date: Wed Sep 23 16:12:20 2020 +1000 + + opal-prd: Have a worker process handle page offlining + + The memory_error() hservice interface expects the memory_error() call to + just accept the offline request and return without actually offlining the + memory. Currently we will attempt to offline the marked pages before + returning to HBRT which can result in an excessively long time spent in the + memory_error() hservice call which blocks HBRT from processing other + errors. Fix this by adding a worker process which performs the page + offlining via the sysfs memory error interfaces. + + Reviewed-by: Vasant Hegde + Signed-off-by: Oliver O'Halloran + +diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c +index 40e5a984..d74d8039 100644 +--- a/external/opal-prd/opal-prd.c ++++ b/external/opal-prd/opal-prd.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -696,13 +697,42 @@ out: + return rc; + } + ++static int memory_error_worker(const char *sysfsfile, const char *type, ++ uint64_t i_start_addr, uint64_t i_endAddr) ++{ ++ int memfd, rc, n, ret = 0; ++ char buf[ADDR_STRING_SZ]; ++ uint64_t addr; ++ ++ memfd = open(sysfsfile, O_WRONLY); ++ if (memfd < 0) { ++ pr_log(LOG_CRIT, "MEM: Failed to offline memory! " ++ "Unable to open sysfs node %s: %m", sysfsfile); ++ return -1; ++ } ++ ++ for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) { ++ n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr); ++ rc = write(memfd, buf, n); ++ if (rc != n) { ++ pr_log(LOG_CRIT, "MEM: Failed to offline memory! " ++ "page addr: %016lx type: %s: %m", ++ addr, type); ++ ret = 1; ++ } ++ } ++ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n", ++ i_start_addr, addr, type); ++ ++ close(memfd); ++ return ret; ++} ++ + int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr, + enum MemoryError_t i_errorType) + { + const char *sysfsfile, *typestr; +- char buf[ADDR_STRING_SZ]; +- int memfd, rc, n, ret = 0; +- uint64_t addr; ++ pid_t pid; + + switch(i_errorType) { + case MEMORY_ERROR_CE: +@@ -722,26 +752,21 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr, + pr_log(LOG_ERR, "MEM: Memory error: range %016lx-%016lx, type: %s", + i_start_addr, i_endAddr, typestr); + ++ /* ++ * HBRT expects the memory offlining process to happen in the background ++ * after the notification is delivered. ++ */ ++ pid = fork(); ++ if (pid > 0) ++ exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr)); + +- memfd = open(sysfsfile, O_WRONLY); +- if (memfd < 0) { +- pr_log(LOG_CRIT, "MEM: Failed to offline memory! " +- "Unable to open sysfs node %s: %m", sysfsfile); ++ if (pid < 0) { ++ perror("MEM: unable to fork worker to offline memory!\n"); + return -1; + } + +- for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) { +- n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr); +- rc = write(memfd, buf, n); +- if (rc != n) { +- pr_log(LOG_CRIT, "MEM: Failed to offline memory! " +- "page addr: %016lx type: %d: %m", +- addr, i_errorType); +- ret = rc; +- } +- } +- +- return ret; ++ pr_log(LOG_INFO, "MEM: forked off %d to handle mem error\n", pid); ++ return 0; + } + + uint64_t hservice_get_interface_capabilities(uint64_t set) +@@ -2112,6 +2137,10 @@ static int init_control_socket(struct opal_prd_ctx *ctx) + return 0; + } + ++static struct sigaction sigchild_action = { ++ .sa_flags = SA_NOCLDWAIT | SA_RESTART, ++ .sa_handler = SIG_DFL, ++}; + + static int run_prd_daemon(struct opal_prd_ctx *ctx) + { +@@ -2243,6 +2272,22 @@ static int run_prd_daemon(struct opal_prd_ctx *ctx) + pr_debug("SCOM: f00f: %lx", be64toh(val)); + } + ++ /* ++ * Setup the SIGCHLD handler to automatically reap the worker threads ++ * we use for memory offlining. We can't do this earlier since the ++ * modprobe helper spawns workers and wants to check their exit status ++ * with waitpid(). Auto-reaping breaks that so enable it just before ++ * entering the attn loop. ++ * ++ * We also setup system call restarting on SIGCHLD since opal-prd ++ * doesn't make any real attempt to handle blocking functions exiting ++ * due to EINTR. ++ */ ++ if (sigaction(SIGCHLD, &sigchild_action, NULL)) { ++ pr_log(LOG_ERR, "CTRL: Failed to register signal handler %m\n"); ++ return -1; ++ } ++ + run_attn_loop(ctx); + rc = 0; + diff --git a/SPECS/opal-prd.spec b/SPECS/opal-prd.spec index 564b2c8..189662d 100644 --- a/SPECS/opal-prd.spec +++ b/SPECS/opal-prd.spec @@ -2,7 +2,7 @@ Name: opal-prd Version: 6.6.3 -Release: 1%{?dist} +Release: 2%{?dist} Summary: OPAL Processor Recovery Diagnostics Daemon Group: System Environment/Daemons @@ -28,6 +28,8 @@ Source1: opal-prd-rsyslog Source2: opal-prd-logrotate Patch0: skiboot-6.6.2-ffspart.patch +# upstream fix +Patch1: opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch %description This package provides a daemon to load and run the OpenPower firmware's @@ -59,6 +61,7 @@ services to the OS (Linux) on IBM Power and OpenPower systems. %prep %setup -q -n %{project}-%{version} %patch0 -p1 -b .build +%patch1 -p1 -b .8cbd0de88d162e387f11569eee1bdecef8fad2e3 %build OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd @@ -126,6 +129,9 @@ install -m 644 %{SOURCE2} %{buildroot}/%{_sysconfdir}/logrotate.d/opal-prd %{_datadir}/qemu/ %changelog +* Mon Oct 05 2020 Than Ngo - 6.6.3-2 +- Resolves: #1885134, fix which makes the actual page off lining asynchronous + * Thu Oct 01 2020 Than Ngo - 6.6.3-1 - Resolves: #1844427, rebase to 6.6.3