d77db6
commit 2290d65b97311dd5736838f1e285355f7f357046
d77db6
Author: Shiju Jose <shiju.jose@huawei.com>
d77db6
Date:   Mon Mar 8 16:57:26 2021 +0000
d77db6
d77db6
    rasdaemon: add support for memory_failure events
d77db6
    
d77db6
    Add support to log the memory_failure kernel trace
d77db6
    events.
d77db6
    
d77db6
    Example rasdaemon log and SQLite DB output for the
d77db6
    memory_failure event,
d77db6
    =================================================
d77db6
    rasdaemon: memory_failure_event store: 0x126ce8f8
d77db6
    rasdaemon: register inserted at db
d77db6
    <...>-785   [000]     0.000024: memory_failure_event: 2020-10-02 13:27:13 -0400 pfn=0x204000000 page_type=free buddy page action_result=Delayed
d77db6
    
d77db6
    CREATE TABLE memory_failure_event (id INTEGER PRIMARY KEY, timestamp TEXT, pfn TEXT, page_type TEXT, action_result TEXT);
d77db6
    INSERT INTO memory_failure_event VALUES(1,'2020-10-02 13:27:13 -0400','0x204000000','free buddy page','Delayed');
d77db6
    ==================================================
d77db6
    
d77db6
    Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
d77db6
    Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
d77db6
d77db6
---
d77db6
 Makefile.am                  |    4 
d77db6
 ras-events.c                 |   15 +++
d77db6
 ras-memory-failure-handler.c |  179 +++++++++++++++++++++++++++++++++++++++++++
d77db6
 ras-memory-failure-handler.h |   25 ++++++
d77db6
 ras-record.c                 |   56 +++++++++++++
d77db6
 ras-record.h                 |   13 +++
d77db6
 ras-report.c                 |   68 ++++++++++++++++
d77db6
 ras-report.h                 |    5 -
d77db6
 8 files changed, 364 insertions(+), 1 deletion(-)
d77db6
d77db6
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
d77db6
+++ b/ras-memory-failure-handler.c	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -0,0 +1,179 @@
d77db6
+/*
d77db6
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
d77db6
+ *
d77db6
+ * This program is free software; you can redistribute it and/or modify
d77db6
+ * it under the terms of the GNU General Public License as published by
d77db6
+ * the Free Software Foundation; either version 2 of the License, or
d77db6
+ * (at your option) any later version.
d77db6
+ *
d77db6
+ * This program is distributed in the hope that it will be useful,
d77db6
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
d77db6
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
d77db6
+ * GNU General Public License for more details.
d77db6
+ */
d77db6
+
d77db6
+#include <stdio.h>
d77db6
+#include <stdlib.h>
d77db6
+#include <string.h>
d77db6
+#include "libtrace/kbuffer.h"
d77db6
+#include "ras-memory-failure-handler.h"
d77db6
+#include "ras-record.h"
d77db6
+#include "ras-logger.h"
d77db6
+#include "ras-report.h"
d77db6
+
d77db6
+/* Memory failure - various types of pages */
d77db6
+enum mf_action_page_type {
d77db6
+	MF_MSG_KERNEL,
d77db6
+	MF_MSG_KERNEL_HIGH_ORDER,
d77db6
+	MF_MSG_SLAB,
d77db6
+	MF_MSG_DIFFERENT_COMPOUND,
d77db6
+	MF_MSG_POISONED_HUGE,
d77db6
+	MF_MSG_HUGE,
d77db6
+	MF_MSG_FREE_HUGE,
d77db6
+	MF_MSG_NON_PMD_HUGE,
d77db6
+	MF_MSG_UNMAP_FAILED,
d77db6
+	MF_MSG_DIRTY_SWAPCACHE,
d77db6
+	MF_MSG_CLEAN_SWAPCACHE,
d77db6
+	MF_MSG_DIRTY_MLOCKED_LRU,
d77db6
+	MF_MSG_CLEAN_MLOCKED_LRU,
d77db6
+	MF_MSG_DIRTY_UNEVICTABLE_LRU,
d77db6
+	MF_MSG_CLEAN_UNEVICTABLE_LRU,
d77db6
+	MF_MSG_DIRTY_LRU,
d77db6
+	MF_MSG_CLEAN_LRU,
d77db6
+	MF_MSG_TRUNCATED_LRU,
d77db6
+	MF_MSG_BUDDY,
d77db6
+	MF_MSG_BUDDY_2ND,
d77db6
+	MF_MSG_DAX,
d77db6
+	MF_MSG_UNSPLIT_THP,
d77db6
+	MF_MSG_UNKNOWN,
d77db6
+};
d77db6
+
d77db6
+/* Action results for various types of pages */
d77db6
+enum mf_action_result {
d77db6
+	MF_IGNORED,     /* Error: cannot be handled */
d77db6
+	MF_FAILED,      /* Error: handling failed */
d77db6
+	MF_DELAYED,     /* Will be handled later */
d77db6
+	MF_RECOVERED,   /* Successfully recovered */
d77db6
+};
d77db6
+
d77db6
+/* memory failure page types */
d77db6
+static const struct {
d77db6
+	int	type;
d77db6
+	const char	*page_type;
d77db6
+} mf_page_type[] = {
d77db6
+	{ MF_MSG_KERNEL, "reserved kernel page" },
d77db6
+	{ MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"},
d77db6
+	{ MF_MSG_SLAB, "kernel slab page"},
d77db6
+	{ MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"},
d77db6
+	{ MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"},
d77db6
+	{ MF_MSG_HUGE, "huge page"},
d77db6
+	{ MF_MSG_FREE_HUGE, "free huge page"},
d77db6
+	{ MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"},
d77db6
+	{ MF_MSG_UNMAP_FAILED, "unmapping failed page"},
d77db6
+	{ MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"},
d77db6
+	{ MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"},
d77db6
+	{ MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page"},
d77db6
+	{ MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page"},
d77db6
+	{ MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page"},
d77db6
+	{ MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page"},
d77db6
+	{ MF_MSG_DIRTY_LRU, "dirty LRU page"},
d77db6
+	{ MF_MSG_CLEAN_LRU, "clean LRU page"},
d77db6
+	{ MF_MSG_TRUNCATED_LRU, "already truncated LRU page"},
d77db6
+	{ MF_MSG_BUDDY, "free buddy page"},
d77db6
+	{ MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"},
d77db6
+	{ MF_MSG_DAX, "dax page"},
d77db6
+	{ MF_MSG_UNSPLIT_THP, "unsplit thp"},
d77db6
+	{ MF_MSG_UNKNOWN, "unknown page"},
d77db6
+};
d77db6
+
d77db6
+/* memory failure action results */
d77db6
+static const struct {
d77db6
+	int result;
d77db6
+	const char *action_result;
d77db6
+} mf_action_result[] = {
d77db6
+	{ MF_IGNORED, "Ignored" },
d77db6
+	{ MF_FAILED, "Failed" },
d77db6
+	{ MF_DELAYED, "Delayed" },
d77db6
+	{ MF_RECOVERED, "Recovered" },
d77db6
+};
d77db6
+
d77db6
+static const char *get_page_type(int page_type)
d77db6
+{
d77db6
+	int i;
d77db6
+
d77db6
+	for (i = 0; i < ARRAY_SIZE(mf_page_type); i++)
d77db6
+		if (mf_page_type[i].type == page_type)
d77db6
+			return mf_page_type[i].page_type;
d77db6
+
d77db6
+	return "unknown page";
d77db6
+}
d77db6
+
d77db6
+static const char *get_action_result(int result)
d77db6
+{
d77db6
+	int i;
d77db6
+
d77db6
+	for (i = 0; i < ARRAY_SIZE(mf_action_result); i++)
d77db6
+		if (mf_action_result[i].result == result)
d77db6
+			return mf_action_result[i].action_result;
d77db6
+
d77db6
+	return "unknown";
d77db6
+}
d77db6
+
d77db6
+
d77db6
+int ras_memory_failure_event_handler(struct trace_seq *s,
d77db6
+				     struct pevent_record *record,
d77db6
+				     struct event_format *event, void *context)
d77db6
+{
d77db6
+	unsigned long long val;
d77db6
+	struct ras_events *ras = context;
d77db6
+	time_t now;
d77db6
+	struct tm *tm;
d77db6
+	struct ras_mf_event ev;
d77db6
+
d77db6
+	/*
d77db6
+	 * Newer kernels (3.10-rc1 or upper) provide an uptime clock.
d77db6
+	 * On previous kernels, the way to properly generate an event would
d77db6
+	 * be to inject a fake one, measure its timestamp and diff it against
d77db6
+	 * gettimeofday. We won't do it here. Instead, let's use uptime,
d77db6
+	 * falling-back to the event report's time, if "uptime" clock is
d77db6
+	 * not available (legacy kernels).
d77db6
+	 */
d77db6
+
d77db6
+	if (ras->use_uptime)
d77db6
+		now = record->ts/user_hz + ras->uptime_diff;
d77db6
+	else
d77db6
+		now = time(NULL);
d77db6
+
d77db6
+	tm = localtime(&now;;
d77db6
+	if (tm)
d77db6
+		strftime(ev.timestamp, sizeof(ev.timestamp),
d77db6
+			 "%Y-%m-%d %H:%M:%S %z", tm);
d77db6
+	trace_seq_printf(s, "%s ", ev.timestamp);
d77db6
+
d77db6
+	if (pevent_get_field_val(s,  event, "pfn", record, &val, 1) < 0)
d77db6
+		return -1;
d77db6
+	sprintf(ev.pfn, "0x%llx", val);
d77db6
+	trace_seq_printf(s, "pfn=0x%llx ", val);
d77db6
+
d77db6
+	if (pevent_get_field_val(s, event, "type", record, &val, 1) < 0)
d77db6
+		return -1;
d77db6
+	ev.page_type = get_page_type(val);
d77db6
+	trace_seq_printf(s, "page_type=%s ", ev.page_type);
d77db6
+
d77db6
+	if (pevent_get_field_val(s, event, "result", record, &val, 1) < 0)
d77db6
+		return -1;
d77db6
+	ev.action_result = get_action_result(val);
d77db6
+	trace_seq_printf(s, "action_result=%s ", ev.action_result);
d77db6
+
d77db6
+	/* Store data into the SQLite DB */
d77db6
+#ifdef HAVE_SQLITE3
d77db6
+	ras_store_mf_event(ras, &ev;;
d77db6
+#endif
d77db6
+
d77db6
+#ifdef HAVE_ABRT_REPORT
d77db6
+	/* Report event to ABRT */
d77db6
+	ras_report_mf_event(ras, &ev;;
d77db6
+#endif
d77db6
+
d77db6
+	return 0;
d77db6
+}
d77db6
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
d77db6
+++ b/ras-memory-failure-handler.h	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -0,0 +1,25 @@
d77db6
+/*
d77db6
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
d77db6
+ *
d77db6
+ * This program is free software; you can redistribute it and/or modify
d77db6
+ * it under the terms of the GNU General Public License as published by
d77db6
+ * the Free Software Foundation; either version 2 of the License, or
d77db6
+ * (at your option) any later version.
d77db6
+ *
d77db6
+ * This program is distributed in the hope that it will be useful,
d77db6
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
d77db6
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
d77db6
+ * GNU General Public License for more details.
d77db6
+*/
d77db6
+
d77db6
+#ifndef __RAS_MEMORY_FAILURE_HANDLER_H
d77db6
+#define __RAS_MEMORY_FAILURE_HANDLER_H
d77db6
+
d77db6
+#include "ras-events.h"
d77db6
+#include "libtrace/event-parse.h"
d77db6
+
d77db6
+int ras_memory_failure_event_handler(struct trace_seq *s,
d77db6
+				     struct pevent_record *record,
d77db6
+				     struct event_format *event, void *context);
d77db6
+
d77db6
+#endif
d77db6
--- a/ras-record.c	2018-04-25 06:19:03.000000000 -0400
d77db6
+++ b/ras-record.c	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -404,6 +404,55 @@ sqlite3_bind_text(priv->stmt_mce_record,
d77db6
 }
d77db6
 #endif
d77db6
 
d77db6
+/*
d77db6
+ * Table and functions to handle ras:memory_failure
d77db6
+ */
d77db6
+
d77db6
+#ifdef HAVE_MEMORY_FAILURE
d77db6
+static const struct db_fields mf_event_fields[] = {
d77db6
+	{ .name="id",			.type="INTEGER PRIMARY KEY" },
d77db6
+	{ .name="timestamp",		.type="TEXT" },
d77db6
+	{ .name="pfn",			.type="TEXT" },
d77db6
+	{ .name="page_type",		.type="TEXT" },
d77db6
+	{ .name="action_result",	.type="TEXT" },
d77db6
+};
d77db6
+
d77db6
+static const struct db_table_descriptor mf_event_tab = {
d77db6
+	.name = "memory_failure_event",
d77db6
+	.fields = mf_event_fields,
d77db6
+	.num_fields = ARRAY_SIZE(mf_event_fields),
d77db6
+};
d77db6
+
d77db6
+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev)
d77db6
+{
d77db6
+	int rc;
d77db6
+	struct sqlite3_priv *priv = ras->db_priv;
d77db6
+
d77db6
+	if (!priv || !priv->stmt_mf_event)
d77db6
+		return 0;
d77db6
+	log(TERM, LOG_INFO, "memory_failure_event store: %p\n", priv->stmt_mf_event);
d77db6
+
d77db6
+	sqlite3_bind_text(priv->stmt_mf_event,  1, ev->timestamp, -1, NULL);
d77db6
+	sqlite3_bind_text(priv->stmt_mf_event,  2, ev->pfn, -1, NULL);
d77db6
+	sqlite3_bind_text(priv->stmt_mf_event,  3, ev->page_type, -1, NULL);
d77db6
+	sqlite3_bind_text(priv->stmt_mf_event,  4, ev->action_result, -1, NULL);
d77db6
+
d77db6
+	rc = sqlite3_step(priv->stmt_mf_event);
d77db6
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
d77db6
+		log(TERM, LOG_ERR,
d77db6
+		    "Failed to do memory_failure_event step on sqlite: error = %d\n", rc);
d77db6
+
d77db6
+	rc = sqlite3_reset(priv->stmt_mf_event);
d77db6
+	if (rc != SQLITE_OK && rc != SQLITE_DONE)
d77db6
+		log(TERM, LOG_ERR,
d77db6
+		    "Failed reset memory_failure_event on sqlite: error = %d\n",
d77db6
+		    rc);
d77db6
+
d77db6
+	log(TERM, LOG_INFO, "register inserted at db\n");
d77db6
+
d77db6
+	return rc;
d77db6
+}
d77db6
+#endif
d77db6
 
d77db6
 /*
d77db6
  * Generic code
d77db6
@@ -567,6 +616,13 @@ usleep(10000);
d77db6
 		rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record,
d77db6
 					&arm_event_tab);
d77db6
 #endif
d77db6
+#ifdef HAVE_MEMORY_FAILURE
d77db6
+	rc = ras_mc_create_table(priv, &mf_event_tab);
d77db6
+	if (rc == SQLITE_OK) {
d77db6
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_mf_event,
d77db6
+					 &mf_event_tab);
d77db6
+	}
d77db6
+#endif
d77db6
 
d77db6
 		ras->db_priv = priv;
d77db6
 	return 0;
d77db6
--- a/ras-record.h	2018-04-25 06:19:03.000000000 -0400
d77db6
+++ b/ras-record.h	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -75,12 +75,20 @@ struct ras_arm_event {
d77db6
 	int32_t psci_state;
d77db6
 };
d77db6
 
d77db6
+struct ras_mf_event {
d77db6
+	char timestamp[64];
d77db6
+	char pfn[30];
d77db6
+	const char *page_type;
d77db6
+	const char *action_result;
d77db6
+};
d77db6
+
d77db6
 struct ras_mc_event;
d77db6
 struct ras_aer_event;
d77db6
 struct ras_extlog_event;
d77db6
 struct ras_non_standard_event;
d77db6
 struct ras_arm_event;
d77db6
 struct mce_event;
d77db6
+struct ras_mf_event;
d77db6
 
d77db6
 #ifdef HAVE_SQLITE3
d77db6
 
d77db6
@@ -104,6 +112,9 @@ struct sqlite3_priv {
d77db6
 #ifdef HAVE_ARM
d77db6
 	sqlite3_stmt	*stmt_arm_record;
d77db6
 #endif
d77db6
+#ifdef HAVE_MEMORY_FAILURE
d77db6
+	sqlite3_stmt	*stmt_mf_event;
d77db6
+#endif
d77db6
 };
d77db6
 
d77db6
 int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras);
d77db6
@@ -113,6 +124,7 @@ int ras_store_mce_record(struct ras_even
d77db6
 int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev);
d77db6
 int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev);
d77db6
 int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev);
d77db6
+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev);
d77db6
 
d77db6
 #else
d77db6
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
d77db6
@@ -122,6 +134,7 @@ static inline int ras_store_mce_record(s
d77db6
 static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; };
d77db6
 static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; };
d77db6
 static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; };
d77db6
+static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; };
d77db6
 
d77db6
 #endif
d77db6
 
d77db6
--- a/ras-report.c	2017-10-14 05:11:34.000000000 -0400
d77db6
+++ b/ras-report.c	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -255,6 +255,28 @@ "midr=0x%lx\n"	\
d77db6
 	return 0;
d77db6
 }
d77db6
 
d77db6
+static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev)
d77db6
+{
d77db6
+	char bt_buf[MAX_BACKTRACE_SIZE];
d77db6
+
d77db6
+	if (!buf || !ev)
d77db6
+		return -1;
d77db6
+
d77db6
+	sprintf(bt_buf, "BACKTRACE="    \
d77db6
+                                                "timestamp=%s\n"	\
d77db6
+                                                "pfn=%s\n"		\
d77db6
+                                                "page_type=%s\n"	\
d77db6
+                                                "action_result=%s\n",	\
d77db6
+                                                ev->timestamp,		\
d77db6
+                                                ev->pfn,		\
d77db6
+                                                ev->page_type,		\
d77db6
+                                                ev->action_result);
d77db6
+
d77db6
+	strcat(buf, bt_buf);
d77db6
+
d77db6
+	return 0;
d77db6
+}
d77db6
+
d77db6
 static int commit_report_backtrace(int sockfd, int type, void *ev){
d77db6
 	char buf[MAX_BACKTRACE_SIZE];
d77db6
 	char *pbuf = buf;
d77db6
@@ -283,6 +305,9 @@ memset(buf, 0, MAX_BACKTRACE_SIZE);
d77db6
 	case ARM_EVENT:
d77db6
 		rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev);
d77db6
 		break;
d77db6
+	case MF_EVENT:
d77db6
+		rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev);
d77db6
+		break;
d77db6
 	default:
d77db6
 		return -1;
d77db6
 	}
d77db6
@@ -549,3 +574,46 @@ return 0;
d77db6
 		return -1;
d77db6
 	}
d77db6
 }
d77db6
+
d77db6
+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev)
d77db6
+{
d77db6
+	char buf[MAX_MESSAGE_SIZE];
d77db6
+	int sockfd = 0;
d77db6
+	int done = 0;
d77db6
+	int rc = -1;
d77db6
+
d77db6
+	memset(buf, 0, sizeof(buf));
d77db6
+
d77db6
+	sockfd = setup_report_socket();
d77db6
+	if (sockfd < 0)
d77db6
+		return -1;
d77db6
+
d77db6
+	rc = commit_report_basic(sockfd);
d77db6
+	if (rc < 0)
d77db6
+		goto mf_fail;
d77db6
+
d77db6
+	rc = commit_report_backtrace(sockfd, MF_EVENT, ev);
d77db6
+	if (rc < 0)
d77db6
+		goto mf_fail;
d77db6
+
d77db6
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-memory_failure");
d77db6
+	rc = write(sockfd, buf, strlen(buf) + 1);
d77db6
+	if (rc < strlen(buf) + 1)
d77db6
+		goto mf_fail;
d77db6
+
d77db6
+	sprintf(buf, "REASON=%s", "memory failure problem");
d77db6
+	rc = write(sockfd, buf, strlen(buf) + 1);
d77db6
+	if (rc < strlen(buf) + 1)
d77db6
+		goto mf_fail;
d77db6
+
d77db6
+	done = 1;
d77db6
+
d77db6
+mf_fail:
d77db6
+	if (sockfd > 0)
d77db6
+		close(sockfd);
d77db6
+
d77db6
+	if (done)
d77db6
+		return 0;
d77db6
+	else
d77db6
+		return -1;
d77db6
+}
d77db6
--- a/ras-report.h	2017-10-14 05:11:34.000000000 -0400
d77db6
+++ b/ras-report.h	2021-10-14 16:31:36.840657728 -0400
d77db6
@@ -34,7 +34,8 @@ enum {
d77db6
 	MCE_EVENT,
d77db6
 	AER_EVENT,
d77db6
 	NON_STANDARD_EVENT,
d77db6
-	ARM_EVENT
d77db6
+	ARM_EVENT,
d77db6
+	MF_EVENT,
d77db6
 };
d77db6
 
d77db6
 #ifdef HAVE_ABRT_REPORT
d77db6
@@ -44,6 +45,7 @@ int ras_report_aer_event(struct ras_even
d77db6
 int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev);
d77db6
 int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev);
d77db6
 int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev);
d77db6
+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev);
d77db6
 
d77db6
 #else
d77db6
 
d77db6
@@ -52,6 +54,7 @@ static inline int ras_report_aer_event(s
d77db6
 static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; };
d77db6
 static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; };
d77db6
 static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; };
d77db6
+static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; };
d77db6
 
d77db6
 #endif
d77db6
 
d77db6
--- a/Makefile.am	2018-04-25 06:21:56.000000000 -0400
d77db6
+++ b/Makefile.am	2021-10-14 16:37:42.423639762 -0400
d77db6
@@ -41,12 +41,16 @@ endif
d77db6
 if WITH_EXTLOG
d77db6
    rasdaemon_SOURCES += ras-extlog-handler.c
d77db6
 endif
d77db6
+if WITH_MEMORY_FAILURE
d77db6
+   rasdaemon_SOURCES += ras-memory-failure-handler.c
d77db6
+endif
d77db6
 if WITH_ABRT_REPORT
d77db6
    rasdaemon_SOURCES += ras-report.c
d77db6
 endif
d77db6
 if WITH_HISI_NS_DECODE
d77db6
    rasdaemon_SOURCES += non-standard-hisi_hip07.c
d77db6
 endif
d77db6
+
d77db6
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
d77db6
 
d77db6
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
d77db6
--- a/ras-events.c	2021-10-14 16:31:36.730658636 -0400
d77db6
+++ b/ras-events.c	2021-10-14 16:37:11.043898809 -0400
d77db6
@@ -33,6 +33,7 @@ * Foundation, Inc., 51 Franklin Street,
d77db6
 #include "ras-arm-handler.h"
d77db6
 #include "ras-mce-handler.h"
d77db6
 #include "ras-extlog-handler.h"
d77db6
+#include "ras-memory-failure-handler.h"
d77db6
 #include "ras-record.h"
d77db6
 #include "ras-logger.h"
d77db6
 
d77db6
@@ -218,6 +219,10 @@ if (rc < 0) {
d77db6
 	rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable);
d77db6
 #endif
d77db6
 
d77db6
+#ifdef HAVE_MEMORY_FAILURE
d77db6
+	rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable);
d77db6
+#endif
d77db6
+
d77db6
 free_ras:
d77db6
 	free(ras);
d77db6
 	return rc;
d77db6
@@ -736,6 +741,16 @@ (void)open("/sys/kernel/debug/ras/daemon
d77db6
 		    "ras", "aer_event");
d77db6
 #endif
d77db6
 
d77db6
+#ifdef HAVE_MEMORY_FAILURE
d77db6
+       rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event",
d77db6
+                              ras_memory_failure_event_handler);
d77db6
+       if (!rc)
d77db6
+               num_events++;
d77db6
+       else
d77db6
+               log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
d77db6
+                   "ras", "memory_failure_event");
d77db6
+#endif
d77db6
+
d77db6
 	if (!num_events) {
d77db6
 		log(ALL, LOG_INFO,
d77db6
 		    "Failed to trace all supported RAS events. Aborting.\n");