Blob Blame History Raw
From c6ed1e1af9356cdce1eaa652061dd6e4eb32d283 Mon Sep 17 00:00:00 2001
From: Junliang Li <lijunliang.dna@gmail.com>
Date: Thu, 13 Feb 2014 10:39:53 +0800
Subject: [PATCH 23/32] add abrt suppport for rasdaemon

Adds abrt as another error mechanism for the rasdaemon.
This patch does:

1) read ras event (mc,mce and aer)

2) setup a abrt-server unix socket

3) write messages follow ABRT server protocol, set event
   info into backtrace zone.

4) commit report.

For now, it depends on ABRT to limit flood reports.

Signed-off-by: Junliang Li <lijunliang.dna@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Makefile.am       |    5 +-
 configure.ac      |    9 +
 ras-aer-handler.c |    6 +
 ras-events.h      |    3 +
 ras-mc-handler.c  |    7 +
 ras-mce-handler.c |    6 +
 ras-report.c      |  429 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 ras-report.h      |   39 +++++
 8 files changed, 503 insertions(+), 1 deletions(-)
 create mode 100644 ras-report.c
 create mode 100644 ras-report.h

diff --git a/Makefile.am b/Makefile.am
index 473ce98..c1668b4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,10 +17,13 @@ if WITH_MCE
 			mce-intel-dunnington.c mce-intel-tulsa.c \
 			mce-intel-sb.c mce-intel-ivb.c
 endif
+if WITH_ABRT_REPORT
+   rasdaemon_SOURCES += ras-report.c
+endif
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
-		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h
+		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index 4fe6ef2..0ea962e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [
 ])
 AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes])
 
+AC_ARG_ENABLE([abrt_report],
+    AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)]))
+
+AS_IF([test "x$enable_abrt_report" = "xyes"], [
+  AC_DEFINE(HAVE_ABRT_REPORT,1,"have report event to ABRT")
+  AC_SUBST([WITH_ABRT_REPORT])
+])
+AM_CONDITIONAL([WITH_ABRT_REPORT], [test x$enable_abrt_report = xyes])
+
 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
 
 CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
diff --git a/ras-aer-handler.c b/ras-aer-handler.c
index e5abaca..50526af 100644
--- a/ras-aer-handler.c
+++ b/ras-aer-handler.c
@@ -24,6 +24,7 @@
 #include "ras-record.h"
 #include "ras-logger.h"
 #include "bitfield.h"
+#include "ras-report.h"
 
 static const char *aer_errors[32] = {
 	/* Correctable errors */
@@ -115,5 +116,10 @@ int ras_aer_event_handler(struct trace_seq *s,
 	ras_store_aer_event(ras, &ev);
 #endif
 
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_aer_event(ras, &ev);
+#endif
+
 	return 0;
 }
diff --git a/ras-events.h b/ras-events.h
index 554a95e..64e045a 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -47,6 +47,9 @@ struct ras_events {
 
 	/* For the mce handler */
 	struct mce_priv	*mce_priv;
+
+	/* For ABRT socket*/
+	int socketfd;
 };
 
 struct pthread_data {
diff --git a/ras-mc-handler.c b/ras-mc-handler.c
index 5c24f65..ffb3805 100644
--- a/ras-mc-handler.c
+++ b/ras-mc-handler.c
@@ -23,6 +23,7 @@
 #include "ras-mc-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
+#include "ras-report.h"
 
 int ras_mc_event_handler(struct trace_seq *s,
 			 struct pevent_record *record,
@@ -189,6 +190,12 @@ int ras_mc_event_handler(struct trace_seq *s,
 	/* Insert data into the SGBD */
 
 	ras_store_mc_event(ras, &ev);
+
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_mc_event(ras, &ev);
+#endif
+
 	return 0;
 
 parse_error:
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
index 59e8d05..1431049 100644
--- a/ras-mce-handler.c
+++ b/ras-mce-handler.c
@@ -26,6 +26,7 @@
 #include "ras-mce-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
+#include "ras-report.h"
 
 /*
  * The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
@@ -401,5 +402,10 @@ int ras_mce_event_handler(struct trace_seq *s,
 	ras_store_mce_record(ras, &e);
 #endif
 
+#ifdef HAVE_ABRT_REPORT
+	/* Report event to ABRT */
+	ras_report_mce_event(ras, &e);
+#endif
+
 	return 0;
 }
diff --git a/ras-report.c b/ras-report.c
new file mode 100644
index 0000000..d3e4a79
--- /dev/null
+++ b/ras-report.c
@@ -0,0 +1,429 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "ras-report.h"
+
+static int setup_report_socket(void){
+	int sockfd = -1;
+	int rc = -1;
+	struct sockaddr_un addr;
+
+	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (sockfd < 0){
+		return -1;
+	}
+
+	memset(&addr, 0, sizeof(struct sockaddr_un));
+	addr.sun_family = AF_UNIX;
+	strncpy(addr.sun_path, ABRT_SOCKET, strlen(ABRT_SOCKET));
+
+	rc = connect(sockfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un));
+	if (rc < 0){
+		return -1;
+	}
+
+	return sockfd;
+}
+
+static int commit_report_basic(int sockfd){
+	char buf[INPUT_BUFFER_SIZE];
+	struct utsname un;
+	int rc = -1;
+
+	if(sockfd < 0){
+		return rc;
+	}
+
+	memset(buf, 0, INPUT_BUFFER_SIZE);
+	memset(&un, 0, sizeof(struct utsname));
+
+	rc = uname(&un);
+	if(rc < 0){
+		return rc;
+	}
+
+	/*
+	 * ABRT server protocol
+	 */
+	sprintf(buf, "PUT / HTTP/1.1\r\n\r\n");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		return -1;
+	}
+
+	sprintf(buf, "PID=%d", (int)getpid());
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		return -1;
+	}
+
+	sprintf(buf, "EXECUTABLE=/boot/vmlinuz-%s", un.release);
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		return -1;
+	}
+
+	sprintf(buf, "BASENAME=%s", "rasdaemon");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ *  add "DONE" string to finish message.
+ */
+static int commit_report_done(int sockfd){
+	int rc = -1;
+
+	if(sockfd < 0){
+		return -1;
+	}
+
+	rc = write(sockfd, "DONE\0", strlen("DONE\0"));
+	if(rc < strlen("DONE\0")){
+		return -1;
+	}
+
+	return 0;
+}
+
+static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if(!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE= "	\
+						"timestamp=%s\n"	\
+						"error_count=%d\n"	\
+						"error_type=%s\n"	\
+						"msg=%s\n"	\
+						"label=%s\n"	\
+						"mc_index=%c\n"	\
+						"top_layer=%c\n"	\
+						"middle_layer=%c\n"	\
+						"lower_layer=%c\n"	\
+						"address=%llu\n"	\
+						"grain=%llu\n"	\
+						"syndrome=%llu\n"	\
+						"driver_detail=%s\n",	\
+						ev->timestamp,	\
+						ev->error_count,	\
+						ev->error_type,	\
+						ev->msg,	\
+						ev->label,	\
+						ev->mc_index,	\
+						ev->top_layer,	\
+						ev->middle_layer,	\
+						ev->lower_layer,	\
+						ev->address,	\
+						ev->grain,	\
+						ev->syndrome,	\
+						ev->driver_detail);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
+static int set_mce_event_backtrace(char *buf, struct mce_event *ev){
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if(!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"bank_name=%s\n"	\
+						"error_msg=%s\n"	\
+						"mcgstatus_msg=%s\n"	\
+						"mcistatus_msg=%s\n"	\
+						"mcastatus_msg=%s\n"	\
+						"user_action=%s\n"	\
+						"mc_location=%s\n"	\
+						"mcgcap=%lu\n"	\
+						"mcgstatus=%lu\n"	\
+						"status=%lu\n"	\
+						"addr=%lu\n"	\
+						"misc=%lu\n"	\
+						"ip=%lu\n"	\
+						"tsc=%lu\n"	\
+						"walltime=%lu\n"	\
+						"cpu=%u\n"	\
+						"cpuid=%u\n"	\
+						"apicid=%u\n"	\
+						"socketid=%u\n"	\
+						"cs=%d\n"	\
+						"bank=%d\n"	\
+						"cpuvendor=%d\n",	\
+						ev->timestamp,	\
+						ev->bank_name,	\
+						ev->error_msg,	\
+						ev->mcgstatus_msg,	\
+						ev->mcistatus_msg,	\
+						ev->mcastatus_msg,	\
+						ev->user_action,	\
+						ev->mc_location,	\
+						ev->mcgcap,	\
+						ev->mcgstatus,	\
+						ev->status,	\
+						ev->addr,	\
+						ev->misc,	\
+						ev->ip,	\
+						ev->tsc,	\
+						ev->walltime,	\
+						ev->cpu,	\
+						ev->cpuid,	\
+						ev->apicid,	\
+						ev->socketid,	\
+						ev->cs,	\
+						ev->bank,	\
+						ev->cpuvendor);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
+static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){
+	char bt_buf[MAX_BACKTRACE_SIZE];
+
+	if(!buf || !ev)
+		return -1;
+
+	sprintf(bt_buf, "BACKTRACE="	\
+						"timestamp=%s\n"	\
+						"error_type=%s\n"	\
+						"dev_name=%s\n"	\
+						"msg=%s\n",	\
+						ev->timestamp,	\
+						ev->error_type,	\
+						ev->dev_name,	\
+						ev->msg);
+
+	strcat(buf, bt_buf);
+
+	return 0;
+}
+
+static int commit_report_backtrace(int sockfd, int type, void *ev){
+	char buf[MAX_BACKTRACE_SIZE];
+	char *pbuf = buf;
+	int rc = -1;
+	int buf_len = 0;
+
+	if(sockfd < 0 || !ev){
+		return -1;
+	}
+
+	memset(buf, 0, MAX_BACKTRACE_SIZE);
+
+	switch(type){
+	case MC_EVENT:
+		rc = set_mc_event_backtrace(buf, (struct ras_mc_event *)ev);
+		break;
+	case AER_EVENT:
+		rc = set_aer_event_backtrace(buf, (struct ras_aer_event *)ev);
+		break;
+	case MCE_EVENT:
+		rc = set_mce_event_backtrace(buf, (struct mce_event *)ev);
+		break;
+	default:
+		return -1;
+	}
+
+	if(rc < 0){
+		return -1;
+	}
+
+	buf_len = strlen(buf);
+
+	for(;buf_len > INPUT_BUFFER_SIZE - 1; buf_len -= (INPUT_BUFFER_SIZE - 1)){
+		rc = write(sockfd, pbuf, INPUT_BUFFER_SIZE - 1);
+		if(rc < INPUT_BUFFER_SIZE - 1){
+			return -1;
+		}
+
+		pbuf = pbuf + INPUT_BUFFER_SIZE - 1;
+	}
+
+	rc = write(sockfd, pbuf, buf_len + 1);
+	if(rc < buf_len){
+		return -1;
+	}
+
+	return 0;
+}
+
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = -1;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if(sockfd < 0){
+		return -1;
+	}
+
+	rc = commit_report_basic(sockfd);
+	if(rc < 0){
+		goto mc_fail;
+	}
+
+	rc = commit_report_backtrace(sockfd, MC_EVENT, ev);
+	if(rc < 0){
+		goto mc_fail;
+	}
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-mc");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto mc_fail;
+	}
+
+	sprintf(buf, "REASON=%s", "EDAC driver report problem");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto mc_fail;
+	}
+
+	rc = commit_report_done(sockfd);
+	if(rc < 0){
+		goto mc_fail;
+	}
+
+	done = 1;
+
+mc_fail:
+
+	if(sockfd > 0){
+		close(sockfd);
+	}
+
+	if(done){
+		return 0;
+	}else{
+		return -1;
+	}
+}
+
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if(sockfd < 0){
+		return -1;
+	}
+
+	rc = commit_report_basic(sockfd);
+	if(rc < 0){
+		goto aer_fail;
+	}
+
+	rc = commit_report_backtrace(sockfd, AER_EVENT, ev);
+	if(rc < 0){
+		goto aer_fail;
+	}
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-aer");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto aer_fail;
+	}
+
+	sprintf(buf, "REASON=%s", "PCIe AER driver report problem");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto aer_fail;
+	}
+
+	rc = commit_report_done(sockfd);
+	if(rc < 0){
+		goto aer_fail;
+	}
+
+	done = 1;
+
+aer_fail:
+
+	if(sockfd > 0){
+		close(sockfd);
+	}
+
+	if(done){
+		return 0;
+	}else{
+		return -1;
+	}
+}
+
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){
+	char buf[MAX_MESSAGE_SIZE];
+	int sockfd = 0;
+	int done = 0;
+	int rc = -1;
+
+	memset(buf, 0, sizeof(buf));
+
+	sockfd = setup_report_socket();
+	if(sockfd < 0){
+		return -1;
+	}
+
+	rc = commit_report_basic(sockfd);
+	if(rc < 0){
+		goto mce_fail;
+	}
+
+	rc = commit_report_backtrace(sockfd, MCE_EVENT, ev);
+	if(rc < 0){
+		goto mce_fail;
+	}
+
+	sprintf(buf, "ANALYZER=%s", "rasdaemon-mce");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto mce_fail;
+	}
+
+	sprintf(buf, "REASON=%s", "Machine Check driver report problem");
+	rc = write(sockfd, buf, strlen(buf) + 1);
+	if(rc < strlen(buf) + 1){
+		goto mce_fail;
+	}
+
+	rc = commit_report_done(sockfd);
+	if(rc < 0){
+		goto mce_fail;
+	}
+
+	done = 1;
+
+mce_fail:
+
+	if(sockfd > 0){
+		close(sockfd);
+	}
+
+	if(done){
+		return 0;
+	}else{
+		return -1;
+	}
+}
diff --git a/ras-report.h b/ras-report.h
new file mode 100644
index 0000000..7920cdf
--- /dev/null
+++ b/ras-report.h
@@ -0,0 +1,39 @@
+#ifndef __RAS_REPORT_H
+#define __RAS_REPORT_H
+
+#include "ras-record.h"
+#include "ras-events.h"
+#include "ras-mc-handler.h"
+#include "ras-mce-handler.h"
+#include "ras-aer-handler.h"
+
+/* Maximal length of backtrace. */
+#define MAX_BACKTRACE_SIZE (1024*1024)
+/* Amount of data received from one client for a message before reporting error. */
+#define MAX_MESSAGE_SIZE (4*MAX_BACKTRACE_SIZE)
+/* Maximal number of characters read from socket at once. */
+#define INPUT_BUFFER_SIZE (8*1024)
+/* ABRT socket file */
+#define ABRT_SOCKET "/var/run/abrt/abrt.socket"
+
+enum {
+	MC_EVENT,
+	MCE_EVENT,
+	AER_EVENT
+};
+
+#ifdef HAVE_ABRT_REPORT
+
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev);
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev);
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev);
+
+#else
+
+static inline int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; };
+static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; };
+static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; };
+
+#endif
+
+#endif
-- 
1.7.1