From 3a9b30dc2bdc2f456d6226d11783ad42f83fec56 Mon Sep 17 00:00:00 2001 From: Michel Lind Date: Sep 19 2024 21:19:22 +0000 Subject: Update to 0.8.1 Update SEL logging patch to PR 180 Incorporate changes from Fedora's rasdaemon-0.8.0-5.fc41 Signed-off-by: Michel Lind --- diff --git a/.gitignore b/.gitignore index 284d300..c6cf656 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/rasdaemon-0.6.7.tar.bz2 +/rasdaemon-*.tar.bz2 +/rasdaemon-0.6.8.tar.gz diff --git a/rasdaemon-openbmc-ipmitool-sel-logging.diff b/rasdaemon-openbmc-ipmitool-sel-logging.diff new file mode 100644 index 0000000..7ba58bf --- /dev/null +++ b/rasdaemon-openbmc-ipmitool-sel-logging.diff @@ -0,0 +1,322 @@ +From e4f0bdc7b62459ff50605ea867898ce1026e5905 Mon Sep 17 00:00:00 2001 +From: Krishna Dhulipala +Date: Thu, 19 Sep 2024 07:58:37 -0700 +Subject: [PATCH] ipmitool SEL logging of AER CEs on OpenBMC platforms + +Signed-off-by: Krishna Dhulipala +--- + Makefile.am | 6 ++-- + configure.ac | 11 ++++++ + ras-aer-handler.c | 24 +++++++++++++ + ras-aer-handler.h | 1 + + ras-events.c | 3 +- + ras-events.h | 3 +- + rasdaemon.c | 11 +++++- + unified-sel.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++ + unified-sel.h | 17 +++++++++ + 9 files changed, 160 insertions(+), 5 deletions(-) + create mode 100644 unified-sel.c + create mode 100644 unified-sel.h + +--- a/Makefile.am ++++ b/Makefile.am +@@ -76,7 +76,9 @@ endif + if WITH_CPU_FAULT_ISOLATION + rasdaemon_SOURCES += ras-cpu-isolation.c queue.c + endif +- ++if WITH_OPENBMC_UNIFIED_SEL ++ rasdaemon_SOURCES += unified-sel.c ++endif + if WITH_CXL + rasdaemon_SOURCES += ras-cxl-handler.c + endif +@@ -96,7 +98,7 @@ include_HEADERS = config.h ras-events.h + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ + non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ + ras-cxl-handler.h ras-cpu-isolation.h queue.h non-standard-yitian.h \ +- non-standard-jaguarmicro.h trigger.h ++ non-standard-jaguarmicro.h trigger.h unified-sel.h + + # This rule can't be called with more than one Makefile job (like make -j8) + # I can't figure out a way to fix that +--- a/configure.ac ++++ b/configure.ac +@@ -190,6 +190,16 @@ AS_IF([test "x$enable_amp_ns_decode" = " + AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all = xyes]) + AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) + ++AC_ARG_ENABLE([openbmc_unified_sel], ++ AS_HELP_STRING([--enable-openbmc-unified-sel], [enable OPENBMC_UNIFIED_SEL events (currently experimental)])) ++ ++AS_IF([test "x$enable_openbmc_unified_sel" = "xyes" || test "x$enable_all" = "xyes"], [ ++ AC_DEFINE(HAVE_OPENBMC_UNIFIED_SEL,1,"have OpenBMC unified SEL") ++ AC_SUBST([WITH_OPENBMC_UNIFIED_SEL]) ++]) ++AM_CONDITIONAL([WITH_OPENBMC_UNIFIED_SEL], [test x$enable_openbmc_unified_sel = xyes || test x$enable_all = xyes]) ++AM_COND_IF([WITH_OPENBMC_UNIFIED_SEL], [USE_OPENBMC_UNIFIED_SEL="yes"], [USE_OPENBMC_UNIFIED_SEL="no"]) ++ + AC_ARG_ENABLE([jaguar_ns_decode], + AS_HELP_STRING([--enable-jaguar-ns-decode], [enable JAGUAR_NS_DECODE events (currently experimental)])) + +@@ -261,6 +271,7 @@ compile time options summary + CXL events : $USE_CXL + Memory CE PFA : $USE_MEMORY_CE_PFA + AMP RAS errors : $USE_AMP_NS_DECODE ++ OpenBMC unified : $USE_OPENBMC_UNIFIED_SEL + CPU fault isolation : $USE_CPU_FAULT_ISOLATION + YITIAN RAS errors : $USE_YITIAN_NS_DECODE + JAGUAR RAS errors : $USE_JAGUAR_NS_DECODE +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -25,6 +25,7 @@ + #include "ras-logger.h" + #include "bitfield.h" + #include "ras-report.h" ++#include "unified-sel.h" + + /* bit field meaning for correctable error */ + static const char *aer_cor_errors[32] = { +@@ -36,12 +37,14 @@ static const char *aer_cor_errors[32] = + [12] = "Replay Timer Timeout", + [13] = "Advisory Non-Fatal", + [14] = "Corrected Internal Error", ++ [15] = "Header Log Overflow", + }; + + /* bit field meaning for uncorrectable error */ + static const char *aer_uncor_errors[32] = { + /* Uncorrectable errors */ + [4] = "Data Link Protocol", ++ [5] = "Surprise Link Down", + [12] = "Poisoned TLP", + [13] = "Flow Control Protocol", + [14] = "Completion Timeout", +@@ -51,8 +54,23 @@ static const char *aer_uncor_errors[32] + [18] = "Malformed TLP", + [19] = "ECRC", + [20] = "Unsupported Request", ++ [21] = "ACS Violation", ++ [22] = "Uncorrected Internal", ++ [23] = "MC Blocked TLP", ++ [24] = "AtomicOp Egress Blocked", ++ [25] = "TLP Prefix Blocked", ++ [26] = "Poisoned TLP Egrees Blocked", + }; + ++static bool use_ipmitool = false; ++ ++void ras_aer_handler_init(int enable_ipmitool) ++{ ++#ifdef HAVE_OPENBMC_UNIFIED_SEL ++ use_ipmitool = (enable_ipmitool > 0) ? 1 : 0; ++#endif ++} ++ + #define BUF_LEN 1024 + + int ras_aer_event_handler(struct trace_seq *s, +@@ -195,5 +213,11 @@ int ras_aer_event_handler(struct trace_s + log(SYSLOG, LOG_WARNING, "Failed to execute ipmitool\n"); + #endif + ++#ifdef HAVE_OPENBMC_UNIFIED_SEL ++ if (use_ipmitool) ++ if (openbmc_unified_sel_log(severity_val, ev.dev_name, status_val) < 0) ++ return -1; ++#endif ++ + return 0; + } +--- a/ras-aer-handler.h ++++ b/ras-aer-handler.h +@@ -26,4 +26,5 @@ int ras_aer_event_handler(struct trace_s + struct tep_record *record, + struct tep_event *event, void *context); + ++void ras_aer_handler_init(int enable_ipmitool); + #endif +--- a/ras-events.c ++++ b/ras-events.c +@@ -894,7 +894,7 @@ static int add_event_handler(struct ras_ + return 0; + } + +-int handle_ras_events(int record_events) ++int handle_ras_events(int record_events, int enable_ipmitool) + { + int rc, page_size, i; + int num_events = 0; +@@ -951,6 +951,7 @@ int handle_ras_events(int record_events) + "ras", "mc_event"); + + #ifdef HAVE_AER ++ ras_aer_handler_init(enable_ipmitool); + rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", + ras_aer_event_handler, NULL, AER_EVENT); + if (!rc) +--- a/ras-events.h ++++ b/ras-events.h +@@ -109,7 +109,8 @@ enum ghes_severity { + + /* Function prototypes */ + int toggle_ras_mc_event(int enable); ++int handle_ras_events(int record_events, int enable_ipmitool); + int ras_offline_mce_event(struct ras_mc_offline_event *event); +-int handle_ras_events(int record_events); ++int handle_ras_events(int record_events, int enable_ipmitool); + + #endif +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -42,6 +42,7 @@ const char *argp_program_bug_address = " + struct arguments { + int record_events; + int enable_ras; ++ int enable_ipmitool; + int foreground; + int offline; + }; +@@ -74,6 +75,11 @@ static error_t parse_opt(int k, char *ar + args->record_events++; + break; + #endif ++#ifdef HAVE_OPENBMC_UNIFIED_SEL ++ case 'i': ++ args->enable_ipmitool++; ++ break; ++#endif + case 'f': + args->foreground++; + break; +@@ -164,6 +170,9 @@ int main(int argc, char *argv[]) + {"record", 'r', 0, 0, "record events via sqlite3", 0}, + #endif + {"foreground", 'f', 0, 0, "run foreground, not daemonize"}, ++#ifdef HAVE_OPENBMC_UNIFIED_SEL ++ {"ipmitool", 'i', 0, 0, "enable ipmitool logging", 0}, ++#endif + #ifdef HAVE_MCE + {"post-processing", 'p', 0, 0, + "Post-processing MCE's with raw register values"}, +@@ -212,7 +221,7 @@ int main(int argc, char *argv[]) + if (daemon(0, 0)) + exit(EXIT_FAILURE); + +- handle_ras_events(args.record_events); ++ handle_ras_events(args.record_events, args.enable_ipmitool); + + return 0; + } +--- /dev/null ++++ b/unified-sel.c +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2023, Meta Platforms Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include "ras-record.h" ++#include "ras-logger.h" ++#include "ras-report.h" ++#include "unified-sel.h" ++ ++/* CPU Root Port Error ID corresponding to each status bit set */ ++static const char *cor_error_ids[32] = { ++ /* Correctable errors */ ++ [0] = "0x00", /* Receiver Error */ ++ [6] = "0x01", /* Bad TLP */ ++ [7] = "0x02", /* Bad DLLP */ ++ [8] = "0x04", /* RELAY_NUM Rollover */ ++ [12] = "0x03", /* Replay Timer Timeout */ ++ [13] = "0x05", /* Advisory Non-Fatal */ ++ [14] = "0x06", /* Corrected Internal */ ++ [15] = "0x07", /* Header Log Overflow */ ++}; ++ ++static int verify_id_log_sel(uint64_t status, ++ const char **idarray, ++ unsigned bus, ++ unsigned dev_fn) ++{ ++ int i; ++ char openbmc_ipmi_add_sel[105]; ++ ++ /* ++ * Get PCIe AER error source bus/dev/fn and save it to the BMC SEL ++ * as a OpenBMC unified SEL record type. ++ * The IPMI command and record fields are defined in IPMI Specification v2.0 (IPMI Spec) ++ * ipmitool raw 0x0a 0x44 is "Add SEL Entry Command" defined in IPMI spec chapter 31.6 ++ * The 16 byte that follow form the SEL Record ++ * defined in IPMI spec chapter 32.1 "SEL Event Records" ++ * Byte 1~2 are Record ID = 0x00 0x00, unused ++ * Byte 3 is Record Type = 0xFB, OEM non-timestamped record type for OpenBMC unified SEL ++ * Byte 4~16 are OEM defined ++ * Byte 11: ++ * Byte11[7:3] Device# ++ * Byte11[2:0] Function# ++ * Byte 12: Bus number ++ * Byte 13-15: Reserved ++ * Byte 16: ID of the error detected on the PCle device that triggered this SEL record ++ */ ++ ++ /* Potentially all error status bits could be set for a given PCIe device. ++ * Therefore, iterate over all 32 bits each of cor and uncor errors ++ */ ++ for (i = 0; i < 32; i++) { ++ if ((status & (1 << i)) && idarray[i]) { ++ sprintf(openbmc_ipmi_add_sel, ++ "ipmitool raw 0x0a 0x44 0x00 0x00 0xFB 0x20 0x00 0x00 0x00 0x00 0x01 0x00 0x%02x 0x%02x 0x01 0x00 0xff %s", ++ dev_fn, bus, idarray[i]); ++ if (system(openbmc_ipmi_add_sel) != 0) ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++int openbmc_unified_sel_log(uint64_t severity, const char *dev_name, uint64_t status) ++{ ++ int bus, dev, dev_fn, fn; ++ ++ sscanf(dev_name, "%*x:%x:%x.%x", &bus, &dev, &fn); ++ dev_fn = (((dev & 0x1f) << 3) | (fn & 0x7)); ++ ++ /* Use the appropriate correctable error status ID ++ * for a given severity level ++ * */ ++ if (severity == HW_EVENT_AER_CORRECTED) { ++ if (verify_id_log_sel(status, cor_error_ids, bus, dev_fn) < 0) ++ return -1; ++ } ++ return 0; ++} +--- /dev/null ++++ b/unified-sel.h +@@ -0,0 +1,17 @@ ++/* ++ * Copyright (c) 2023, Meta Platforms Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ */ ++ ++ ++#ifndef _UNIFIED_SEL_H ++#define _UNIFIED_SEL_H ++ ++int openbmc_unified_sel_log(uint64_t severity, const char *dev_name, uint64_t status); ++ ++#endif diff --git a/rasdaemon-openbmc-unified-sel.diff b/rasdaemon-openbmc-unified-sel.diff deleted file mode 100644 index dad7e87..0000000 --- a/rasdaemon-openbmc-unified-sel.diff +++ /dev/null @@ -1,251 +0,0 @@ -commit 2a194a220d4a377f2b25e308a01ffe9675b93c31 -Author: Krishna Dhulipala -Date: Tue Jun 27 12:09:45 2023 -0700 - - Unified SEL logging of AER events - -diff --git a/Makefile.am b/Makefile.am -index fabca78..1ea3356 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -63,13 +63,17 @@ endif - if WITH_AMP_NS_DECODE - rasdaemon_SOURCES += non-standard-ampere.c - endif -+if WITH_OPENBMC_UNIFIED_SEL -+ rasdaemon_SOURCES += unified-sel.c -+endif - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ - ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ -- non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h -+ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ -+ unified-sel.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that -diff --git a/configure.ac b/configure.ac -index f7d1947..4a534b7 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -161,6 +161,16 @@ AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], - AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes]) - AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) - -+AC_ARG_ENABLE([openbmc_unified_sel], -+ AS_HELP_STRING([--enable-openbmc-unified-sel], [enable OPENBMC_UNIFIED_SEL events (currently exprimental)])) -+ -+AS_IF([test "x$enable_openbmc_unified_sel" = "xyes" || test "x$enable_all" = "xyes"], [ -+ AC_DEFINE(HAVE_OPENBMC_UNIFIED_SEL,1,"have OpenBMC unified SEL") -+ AC_SUBST([WITH_OPENBMC_UNIFIED_SEL]) -+]) -+AM_CONDITIONAL([WITH_OPENBMC_UNIFIED_SEL], [test x$enable_openbmc_unified_sel = xyes || test x$enabl_all = xyes]) -+AM_COND_IF([WITH_OPENBMC_UNIFIED_SEL], [USE_OPENBMC_UNIFIED_SEL="yes"], [USE_OPENBMC_UNIFIED_SEL="no"]) -+ - test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc - - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -201,4 +211,5 @@ compile time options summary - Memory Failure : $USE_MEMORY_FAILURE - Memory CE PFA : $USE_MEMORY_CE_PFA - AMP RAS errors : $USE_AMP_NS_DECODE -+ OpenBMC unified : $USE_OPENBMC_UNIFIED_SEL - EOF -diff --git a/ras-aer-handler.c b/ras-aer-handler.c -index 8ddd439..c03f6ec 100644 ---- a/ras-aer-handler.c -+++ b/ras-aer-handler.c -@@ -25,6 +25,7 @@ - #include "ras-logger.h" - #include "bitfield.h" - #include "ras-report.h" -+#include "unified-sel.h" - - /* bit field meaning for correctable error */ - static const char *aer_cor_errors[32] = { -@@ -35,12 +36,15 @@ static const char *aer_cor_errors[32] = { - [8] = "RELAY_NUM Rollover", - [12] = "Replay Timer Timeout", - [13] = "Advisory Non-Fatal", -+ [14] = "Corrected Internal", -+ [15] = "Header Log Overflow", - }; - - /* bit field meaning for uncorrectable error */ - static const char *aer_uncor_errors[32] = { - /* Uncorrectable errors */ - [4] = "Data Link Protocol", -+ [5] = "Surprise Link Down", - [12] = "Poisoned TLP", - [13] = "Flow Control Protocol", - [14] = "Completion Timeout", -@@ -50,6 +54,12 @@ static const char *aer_uncor_errors[32] = { - [18] = "Malformed TLP", - [19] = "ECRC", - [20] = "Unsupported Request", -+ [21] = "ACS Violation", -+ [22] = "Uncorrected Internal", -+ [23] = "MC Blocked TLP", -+ [24] = "AtomicOp Egress Blocked", -+ [25] = "TLP Prefix Blocked", -+ [26] = "Poisoned TLP Egrees Blocked", - }; - - #define BUF_LEN 1024 -@@ -151,5 +161,10 @@ int ras_aer_event_handler(struct trace_seq *s, - ras_report_aer_event(ras, &ev); - #endif - -+#ifdef HAVE_OPENBMC_UNIFIED_SEL -+ if (openbmc_unified_sel_log(severity_val, ev.dev_name, status_val) < 0) -+ return -1; -+#endif -+ - return 0; - } -diff --git a/unified-sel.c b/unified-sel.c -new file mode 100644 -index 0000000..287bb4f ---- /dev/null -+++ b/unified-sel.c -@@ -0,0 +1,114 @@ -+/* -+ * Copyright (c) 2023, Meta Platforms Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include "ras-record.h" -+#include "ras-logger.h" -+#include "ras-report.h" -+#include "unified-sel.h" -+ -+/* CPU Root Port Error ID corresponding to each status bit set */ -+static const char *cor_error_ids[32] = { -+ /* Correctable errors */ -+ [0] = "0x00", /* Receiver Error */ -+ [6] = "0x01", /* Bad TLP */ -+ [7] = "0x02", /* Bad DLLP */ -+ [8] = "0x04", /* RELAY_NUM Rollover */ -+ [12] = "0x03", /* Replay Timer Timeout */ -+ [13] = "0x05", /* Advisory Non-Fatal */ -+ [14] = "0x06", /* Corrected Internal */ -+ [15] = "0x07", /* Header Log Overflow */ -+}; -+ -+static const char *uncor_error_ids[32] = { -+ /* Uncorrectable errors */ -+ [4] = "0x20", /* Data Link Protocol */ -+ [5] = "0x21", /* Surprise Link Down */ -+ [12] = "0x22", /* Poisoned TLP */ -+ [13] = "0x23", /* Flow Control Protocol */ -+ [14] = "0x24", /* Completion Timeout */ -+ [15] = "0x25", /* Completer Abort */ -+ [16] = "0x26", /* Unexpected Completion */ -+ [17] = "0x27", /* Receiver Overflow */ -+ [18] = "0x29", /* Malformed TLP */ -+ [19] = "0x29", /* ECRC */ -+ [20] = "0x2A", /* Unsupported Request */ -+ [21] = "0x2B", /* ACS Violation */ -+ [22] = "0x2C", /* Uncorrected Internal */ -+ [23] = "0x2D", /* MC Blocked TLP */ -+ [24] = "0x2E", /* AtomicOp Egress Blocked */ -+ [25] = "0x2F", /* TLP Prefix Blocked */ -+ [26] = "0x30", /* Poisoned TLP Egrees Blocked */ -+}; -+ -+static int verify_id_log_sel(uint64_t status, -+ const char **idarray, -+ unsigned bus, -+ unsigned dev_fn) -+{ -+ int i; -+ char openbmc_ipmi_add_sel[105]; -+ -+ /* -+ * Get PCIe AER error source bus/dev/fn and save it to the BMC SEL -+ * as a OpenBMC unified SEL record type. -+ * The IPMI command and record fields are defined in IPMI Specification v2.0 (IPMI Spec) -+ * ipmitool raw 0x0a 0x44 is "Add SEL Entry Command" defined in IPMI spec chapter 31.6 -+ * The 16 byte that follow form the SEL Record -+ * defined in IPMI spec chapter 32.1 "SEL Event Records" -+ * Byte 1~2 are Record ID = 0x00 0x00, unused -+ * Byte 3 is Record Type = 0xFB, OEM non-timestamped record type for OpenBMC unified SEL -+ * Byte 4~16 are OEM defined -+ * Byte 11: -+ * Byte11[7:3] Device# -+ * Byte11[2:0] Function# -+ * Byte 12: Bus number -+ * Byte 13-15: Reserved -+ * Byte 16: ID of the error detected on the PCle device that triggered this SEL record -+ */ -+ -+ /* Potentially all error status bits could be set for a given PCIe device. -+ * Therefore, iterate over all 32 bits each of cor and uncor errors -+ */ -+ for (i = 0; i < 32; i++) { -+ if ((status & (1 << i)) && idarray[i]) { -+ sprintf(openbmc_ipmi_add_sel, -+ "ipmitool raw 0x0a 0x44 0x00 0x00 0xFB 0x20 0x00 0x00 0x00 0x00 0x01 0x00 0x%02x 0x%02x 0x01 0x00 0xff %s", -+ dev_fn, bus, idarray[i]); -+ if (system(openbmc_ipmi_add_sel) != 0) -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+int openbmc_unified_sel_log(uint64_t severity, const char *dev_name, uint64_t status) -+{ -+ int bus, dev, dev_fn, fn; -+ -+ sscanf(dev_name, "%*x:%x:%x.%x", &bus, &dev, &fn); -+ dev_fn = (((dev & 0x1f) << 3) | (fn & 0x7)); -+ -+ /* Use the appropriate correctable or uncorrectable error status ID -+ * for a gien severity level -+ */ -+ if (severity == HW_EVENT_AER_CORRECTED) { -+ if (verify_id_log_sel(status, cor_error_ids, bus, dev_fn) < 0) -+ return -1; -+ } -+ else { -+ if (verify_id_log_sel(status, uncor_error_ids, bus, dev_fn) < 0) -+ return -1; -+ } -+ return 0; -+} -diff --git a/unified-sel.h b/unified-sel.h -new file mode 100644 -index 0000000..17458a5 ---- /dev/null -+++ b/unified-sel.h -@@ -0,0 +1,17 @@ -+/* -+ * Copyright (c) 2023, Meta Platforms Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+ -+ -+#ifndef _UNIFIED_SEL_H -+#define _UNIFIED_SEL_H -+ -+int openbmc_unified_sel_log(uint64_t severity, const char *dev_name, uint64_t status); -+ -+#endif diff --git a/rasdaemon.spec b/rasdaemon.spec index 6ee4c33..991ac49 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,33 +1,28 @@ Name: rasdaemon -Version: 0.6.7 -Release: 4%{?dist} +Version: 0.8.1 +Release: 1%{?dist} Summary: Utility to receive RAS error tracings -License: GPLv2 +License: GPL-2.0-only URL: http://git.infradead.org/users/mchehab/rasdaemon.git Source0: http://www.infradead.org/~mchehab/rasdaemon/%{name}-%{version}.tar.bz2 -# add unified SEL logging for OpenBMC -# https://github.com/mchehab/rasdaemon/pull/97 -# not downloading the latest as we can't cleanly apply the PR -Patch0: %{name}-openbmc-unified-sel.diff -# Patch1: https://github.com/mchehab/rasdaemon/pull/97/commits/414a8bc15285ab9cad8523b90daf0a86f7843ba0.patch#/%%{name}-cli-logging-argument.diff -Patch2: https://github.com/mchehab/rasdaemon/pull/97/commits/626f7c90ff943be9cdc5ca3d3bbeac644bef2485.patch#/%{name}-log-correctable-errors-only.diff -Patch3: https://github.com/mchehab/rasdaemon/pull/97/commits/bcffec13f53796fe150f6464540d5e324ea6977d.patch#/%{name}-fix-typo.diff +# ipmitool SEL logging for OpenBMC +# https://github.com/mchehab/rasdaemon/pull/180 +# needs slight modification to account for header changes in one source file +Patch0: %{name}-openbmc-ipmitool-sel-logging.diff ExcludeArch: s390 s390x -%if 0%{?facebook} -BuildRequires: autoconf -BuildRequires: automake -BuildRequires: libtool -%endif +BuildRequires: make BuildRequires: gcc +BuildRequires: autoconf automake libtool BuildRequires: gettext-devel -BuildRequires: make BuildRequires: perl-generators BuildRequires: sqlite-devel BuildRequires: systemd +BuildRequires: libtraceevent-devel Provides: bundled(kernel-event-lib) Requires: hwdata Requires: perl-DBD-SQLite +Requires: libtraceevent %ifarch %{ix86} x86_64 Requires: dmidecode %endif @@ -49,37 +44,56 @@ an utility for reporting current error counts from the EDAC sysfs files. %prep %if 0%{?facebook} %autosetup -p1 -autoreconf -fiv %else -%autosetup -N +%setup -q %endif +autoreconf -vfi %build %ifarch %{arm} aarch64 -%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-non-standard --enable-arm --enable-hisi-ns-decode --enable-openbmc-unified-sel --with-sysconfdefdir="%{_sysconfdir}/sysconfig" +%configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm \ + --enable-mce --enable-extlog --enable-devlink --enable-diskerror \ + --enable-memory-failure --enable-abrt-report --enable-hisi-ns-decode \ + --enable-memory-ce-pfa --enable-amp-ns-decode --enable-cpu-fault-isolation \ +%if 0%{?facebook} + --enable-openbmc-unified-sel \ +%endif + --with-sysconfdefdir=%{_sysconfdir}/sysconfig %else -%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-memory-ce-pfa --enable-openbmc-unified-sel --with-sysconfdefdir="%{_sysconfdir}/sysconfig" +%configure --enable-sqlite3 --enable-aer \ + --enable-mce --enable-extlog --enable-devlink --enable-diskerror \ + --enable-memory-failure --enable-abrt-report --enable-cpu-fault-isolation \ +%if 0%{?facebook} + --enable-openbmc-unified-sel \ +%endif + --with-sysconfdefdir=%{_sysconfdir}/sysconfig %endif make %{?_smp_mflags} %install make install DESTDIR=%{buildroot} -install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service +install -D -p -m 0644 misc/rasdaemon.service %{buildroot}%{_unitdir}/rasdaemon.service install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service +install -D -p -m 0655 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} rm INSTALL %{buildroot}/usr/include/*.h %files -%doc AUTHORS ChangeLog COPYING README TODO +%doc AUTHORS ChangeLog COPYING README.md TODO %{_sbindir}/rasdaemon %{_sbindir}/ras-mc-ctl %{_mandir}/*/* %{_unitdir}/*.service %{_sysconfdir}/ras/dimm_labels.d -%ifnarch %{arm} aarch64 -%{_sysconfdir}/sysconfig/rasdaemon -%endif +%{_sysconfdir}/ras/triggers/mc_event_trigger +%{_sysconfdir}/ras/triggers/mem_fail_trigger +%config(noreplace) %{_sysconfdir}/sysconfig/%{name} %changelog +* Thu Sep 19 2024 Michel Lind - 0.8.1-1 +- Update to 0.8.1 +- Update SEL logging patch to PR 180 +- Incorporate changes from Fedora's rasdaemon-0.8.0-5.fc41 + * Thu Oct 05 2023 Michel Lind - 0.6.7-4 - Update unified SEL logging to only log correctable errors - Limit unified SEL logging changes to hs+fb builds for now diff --git a/sources b/sources index 45e925f..8ba99de 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (rasdaemon-0.6.7.tar.bz2) = 15beae5d4964c49b7b7f9e731948b5def9622fba5d7d17ce52a282d7834d256366cdf3cf427b82b2a6a8fd0c99f202f545000bdb06064fbae7ae0296aef0946c +SHA512 (rasdaemon-0.8.1.tar.bz2) = 6bfc6b451e80b01af5a9f4f94841dabeb6c35f6cef4d0b8ecaa9f9c61b737955cea34aceddcefd1ac36025526d2919eefa8564fccc0673518049f1ed4eff9cb1