diff --git a/SOURCES/mcelog-patch-cfa11588ad8b.patch b/SOURCES/mcelog-patch-cfa11588ad8b.patch new file mode 100644 index 0000000..f821414 --- /dev/null +++ b/SOURCES/mcelog-patch-cfa11588ad8b.patch @@ -0,0 +1,45 @@ +From cfa11588ad8b95b81b272e6fcec41b788455e8ec Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 3 Feb 2017 16:51:04 -0800 +Subject: [PATCH] Intel Xeons from Ivy Bridge onwards support a processor + identification number. Kernels v4.9 and higher include it + in the "mce" record. + +Signed-off-by: Tony Luck +Signed-off-by: Andi Kleen +--- + mcelog.c | 3 +++ + mcelog.h | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/mcelog.c b/mcelog.c +index 3ae230dc7ef3..507f11bdbccb 100644 +--- a/mcelog.c ++++ b/mcelog.c +@@ -445,6 +445,9 @@ static void dump_mce(struct mce *m, unsigned recordlen) + if (n > 0) + Wprintf("\n"); + ++ if (recordlen >= offsetof(struct mce, ppin) && m->ppin) ++ n += Wprintf("PPIN %llx\n", m->ppin); ++ + if (recordlen >= offsetof(struct mce, cpuid) && m->cpuid) { + u32 fam, mod; + parse_cpuid(m->cpuid, &fam, &mod); +diff --git a/mcelog.h b/mcelog.h +index 6e175fede0f4..1f9453459b5d 100644 +--- a/mcelog.h ++++ b/mcelog.h +@@ -31,6 +31,9 @@ struct mce { + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial apic ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ ++ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ ++ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ ++ __u64 ppin; /* Protected Processor Inventory Number */ + }; + + #define X86_VENDOR_INTEL 0 +-- +1.7.9.3 + diff --git a/SOURCES/mcelog-patch-e9aeed03f3d1.patch b/SOURCES/mcelog-patch-e9aeed03f3d1.patch new file mode 100644 index 0000000..60b048d --- /dev/null +++ b/SOURCES/mcelog-patch-e9aeed03f3d1.patch @@ -0,0 +1,39 @@ +From 4e9aeed03f3d17fb92662ff656566b0afb2ec99f Mon Sep 17 00:00:00 2001 +From: Prarit Bhargava +Date: Wed, 11 Jan 2017 10:01:40 -0500 +Subject: [PATCH] mcelog: is_cpu_supported() error message must be printed + Eprintf + +SYSERRprintf outputs a ": Success" message so the error message looks like + +mcelog: ERROR: AMD Processor family 21: mcelog does not support this +processor. Please use the edac_mce_amd module instead. +: Success + +which is confusing for end-users. + +I changed this to do Eprintf which keeps the error return to userspace +but drops the ": Success" line. + +Signed-off-by: Prarit Bhargava +Signed-off-by: Andi Kleen +--- + mcelog.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mcelog.c b/mcelog.c +index 96bebee14afe..37c0af624870 100644 +--- a/mcelog.c ++++ b/mcelog.c +@@ -542,7 +542,7 @@ int is_cpu_supported(void) + if (family == 15) { + cputype = CPU_K8; + } else if (family >= 16) { +- SYSERRprintf("ERROR: AMD Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); ++ Eprintf("ERROR: AMD Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); + return 0; + } + } else if (!strcmp(vendor,"GenuineIntel")) +-- +1.7.9.3 + diff --git a/SOURCES/mcelog-update-94d853b2ea81.patch b/SOURCES/mcelog-update-94d853b2ea81.patch new file mode 100644 index 0000000..3b6eecc --- /dev/null +++ b/SOURCES/mcelog-update-94d853b2ea81.patch @@ -0,0 +1,2086 @@ +diff -urNp mcelog-d2e13bf0.orig/broadwell_epex.c mcelog-d2e13bf0/broadwell_epex.c +--- mcelog-d2e13bf0.orig/broadwell_epex.c 2016-11-30 11:23:54.542909636 -0500 ++++ mcelog-d2e13bf0/broadwell_epex.c 2016-11-30 11:24:12.203619329 -0500 +@@ -23,6 +23,11 @@ + #include "broadwell_epex.h" + #include "memdb.h" + ++/* Memory error was corrected by mirroring with channel failover */ ++#define BDW_MCI_MISC_FO (1ULL<<41) ++/* Memory error was corrected by mirroring and primary channel scrubbed successfully */ ++#define BDW_MCI_MISC_MC (1ULL<<42) ++ + /* See IA32 SDM Vol3B Table 16-20 */ + + static char *pcu_1[] = { +@@ -147,3 +152,23 @@ void bdw_epex_decode_model(int cputype, + break; + } + } ++ ++/* ++ * return: 0 - CE by normal ECC ++ * 1 - CE by mirroring with channel failover ++ * 2 - CE by mirroring and primary channel scrubbed successfully ++ */ ++int bdw_epex_ce_type(int bank, u64 status, u64 misc) ++{ ++ if (!(bank == 7 || bank == 8)) ++ return 0; ++ ++ if (status & MCI_STATUS_MISCV) { ++ if (misc & BDW_MCI_MISC_FO) ++ return 1; ++ if (misc & BDW_MCI_MISC_MC) ++ return 2; ++ } ++ ++ return 0; ++} +diff -urNp mcelog-d2e13bf0.orig/broadwell_epex.h mcelog-d2e13bf0/broadwell_epex.h +--- mcelog-d2e13bf0.orig/broadwell_epex.h 2016-11-30 11:23:54.542909636 -0500 ++++ mcelog-d2e13bf0/broadwell_epex.h 2016-11-30 11:24:12.203619329 -0500 +@@ -1 +1,2 @@ + void bdw_epex_decode_model(int cputype, int bank, u64 status, u64 misc); ++int bdw_epex_ce_type(int bank, u64 status, u64 misc); +diff -urNp mcelog-d2e13bf0.orig/client.c mcelog-d2e13bf0/client.c +--- mcelog-d2e13bf0.orig/client.c 2016-11-30 11:23:54.530909154 -0500 ++++ mcelog-d2e13bf0/client.c 2016-11-30 11:24:12.203619329 -0500 +@@ -67,3 +67,11 @@ void ask_server(char *command) + + SYSERRprintf("client read"); + } ++ ++void client_cleanup(void) ++{ ++ char *path = config_string("server", "socket-path"); ++ if (!path) ++ path = SOCKET_PATH; ++ unlink(path); ++} +diff -urNp mcelog-d2e13bf0.orig/client.h mcelog-d2e13bf0/client.h +--- mcelog-d2e13bf0.orig/client.h 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/client.h 2016-11-30 11:24:12.203619329 -0500 +@@ -1 +1,2 @@ + void ask_server(char *command); ++void client_cleanup(void); +diff -urNp mcelog-d2e13bf0.orig/db.c mcelog-d2e13bf0/db.c +--- mcelog-d2e13bf0.orig/db.c 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/db.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,599 +0,0 @@ +-/* Copyright (C) 2006 Andi Kleen, SuSE Labs. +- Dumb database manager. +- not suitable for large datasets, but human readable files and simple. +- assumes groups and entries-per-group are max low double digits. +- the in memory presentation could be easily optimized with a few +- hashes, but that shouldn't be needed for now. +- Note: obsolete, new design uses in memory databases only +- +- mcelog is free software; you can redistribute it and/or +- modify it under the terms of the GNU General Public +- License as published by the Free Software Foundation; version +- 2. +- +- mcelog is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- General Public License for more details. +- +- You should find a copy of v2 of the GNU General Public License somewhere +- on your Linux system; if not, write to the Free Software Foundation, +- Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +- +-/* TBD: +- add lock file to protect final rename +- timeout for locks +-*/ +- +-#define _GNU_SOURCE 1 +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "db.h" +-#include "memutil.h" +- +-/* file format +- +-# comment +-[group1] +-entry1: value +-entry2: value +- +-# comment +-# comment2 +-[group2] +-entry: value +- +-value is anything before new line, but first will be skipped +-spaces are allowed in entry names or groups +-comments are preserved, but moved in front of the group +-blank lines allowed. +- +-code doesnt check for unique records/entries right now. first wins. +- +-*/ +- +-struct entry { +- char *name; +- char *val; +-}; +- +-struct group { +- struct group *next; +- char *name; +- struct entry *entries; +- char *comment; +- int numentries; +-}; +- +-#define ENTRY_CHUNK (128 / sizeof(struct entry)) +- +-struct database { +- struct group *groups; +- FILE *fh; +- char *fn; +- int dirty; +-}; +- +-static int read_db(struct database *db); +-static FILE *open_file(char *fn, int wr); +-static void free_group(struct group *g); +- +-static void DBerror(char *fmt, ...) +-{ +- va_list ap; +- va_start(ap,fmt); +- vfprintf(stderr, fmt, ap); +- va_end(ap); +- exit(1); +-} +- +-#define DB_NEW(p) ((p) = xalloc(sizeof(*(p)))) +- +-static struct group *alloc_group(char *name) +-{ +- struct group *g; +- DB_NEW(g); +- g->entries = xalloc(ENTRY_CHUNK * sizeof(struct entry)); +- g->name = name; +- return g; +-} +- +-static char *cleanline(char *s) +-{ +- char *p; +- while (isspace(*s)) +- s++; +- if (*s == 0) +- return NULL; +- p = strchr(s, '\n'); +- if (p) +- *p = 0; +- return s; +-} +- +-struct database *open_db(char *fn, int wr) +-{ +- struct database *db; +- +- DB_NEW(db); +- db->fh = open_file(fn, wr); +- if (!db->fh) { +- DBerror("Cannot open database %s\n", fn); +- free(db); +- return NULL; +- } +- db->fn = xstrdup(fn); +- if (read_db(db) < 0) { +- free(db->fn); +- free(db); +- return NULL; +- } +- return db; +-} +- +-static int read_db(struct database *db) +-{ +- char *line = NULL; +- size_t linesz = 0; +- struct group *group = NULL, **pgroup = &db->groups; +- int linenr = 0; +- +- while (getline(&line, &linesz, db->fh) > 0) { +- char *s; +- s = strchr(line, '#'); +- if (s) { +- struct group *cmt; +- DB_NEW(cmt); +- *pgroup = cmt; +- pgroup = &cmt->next; +- cmt->comment = xstrdup(s + 1); +- *s = 0; +- } +- s = cleanline(line); +- linenr++; +- if (!s) +- continue; +- if (*s == '[') { +- int n; +- char *name; +- ++s; +- n = strcspn(s, "]"); +- if (s[n] == 0) +- goto parse_error; +- name = xalloc(n + 1); +- memcpy(name, s, n); +- group = alloc_group(name); +- *pgroup = group; +- pgroup = &group->next; +- } else { +- char *p; +- if (!group) +- goto parse_error; +- p = s + strcspn(s, ":"); +- if (*p != ':') +- goto parse_error; +- *p++ = 0; +- if (*p == ' ') +- p++; +- else +- goto parse_error; +- change_entry(db, group, line, p); +- } +- } +- +- if (ferror(db->fh)) { +- DBerror("IO error while reading database %s: %s\n", db->fn, +- strerror(errno)); +- goto error; +- } +- +- free(line); +- return 0; +- +-parse_error: +- DBerror("Parse error in database %s at line %d\n", db->fn, linenr); +-error: +- free(line); +- return -1; +-} +- +-/* +-Crash safety strategy: +- +-While the database is opened hold a exclusive flock on the file +-When writing write to a temporary file (.out). Only when the file +-is written rename to another temporary file (.complete). +- +-Then sync and swap tmp file with main file, then sync directory +-(later is linux specific) +- +-During open if the main file doesn't exist and a .complete file does +-rename the .complete file to main first; or open the .complete +-file if the file system is read only. +- +-*/ +- +-/* Flush directory. Useful on ext2, on journaling file systems +- the later fsync would usually force earlier transactions on the +- metadata too. */ +-static int flush_dir(char *fn) +-{ +- int err, fd; +- char *p; +- char dir[strlen(fn) + 1]; +- strcpy(dir, fn); +- p = strrchr(dir, '/'); +- if (p) +- *p = 0; +- else +- strcpy(dir, "."); +- fd = open(dir, O_DIRECTORY|O_RDONLY); +- if (fd < 0) +- return -1; +- err = 0; +- if (fsync(fd) < 0) +- err = -1; +- if (close(fd) < 0) +- err = -1; +- return err; +-} +- +-static int force_rename(char *a, char *b) +-{ +- unlink(b); /* ignore error */ +- return rename(a, b); +-} +- +-static int rewrite_db(struct database *db) +-{ +- FILE *fhtmp; +- int err; +- +- int tmplen = strlen(db->fn) + 10; +- char fn_complete[tmplen], fn_old[tmplen], fn_out[tmplen]; +- +- sprintf(fn_complete, "%s.complete", db->fn); +- sprintf(fn_old, "%s~", db->fn); +- sprintf(fn_out, "%s.out", db->fn); +- +- fhtmp = fopen(fn_out, "w"); +- if (!fhtmp) { +- DBerror("Cannot open `%s' output file: %s\n", fn_out, +- strerror(errno)); +- return -1; +- } +- +- dump_database(db, fhtmp); +- +- err = 0; +- /* Finish the output file */ +- if (ferror(fhtmp) || fflush(fhtmp) != 0 || fsync(fileno(fhtmp)) != 0 || +- fclose(fhtmp)) +- err = -1; +- /* Rename to .complete */ +- else if (force_rename(fn_out, fn_complete)) +- err = -1; +- /* RED-PEN: need to do retry for race */ +- /* Move to final name */ +- else if (force_rename(db->fn, fn_old) || rename(fn_complete, db->fn)) +- err = -1; +- /* Hit disk */ +- else if (flush_dir(db->fn)) +- err = -1; +- +- if (err) { +- DBerror("Error writing to database %s: %s\n", db->fn, +- strerror(errno)); +- } +- +- return err; +-} +- +-int sync_db(struct database *db) +-{ +- if (!db->dirty) +- return 0; +- /* RED-PEN window without lock */ +- if (rewrite_db(db)) +- return -1; +- fclose(db->fh); +- db->dirty = 0; +- db->fh = open_file(db->fn, 1); +- if (!db->fh) +- return -1; +- return 0; +-} +- +-static void free_group(struct group *g) +-{ +- free(g->entries); +- free(g->name); +- free(g->comment); +- free(g); +-} +- +-static void free_data(struct database *db) +-{ +- struct group *g, *gnext; +- for (g = db->groups; g; g = gnext) { +- gnext = g->next; +- free_group(g); +- } +-} +- +-int close_db(struct database *db) +-{ +- if (db->dirty && rewrite_db(db)) +- return -1; +- if (fclose(db->fh)) +- return -1; +- free_data(db); +- free(db->fn); +- free(db); +- return 0; +-} +- +-static FILE *open_file(char *fn, int wr) +-{ +- char tmp[strlen(fn) + 10]; +- FILE *fh; +- if (access(fn, wr ? (R_OK|W_OK) : R_OK)) { +- switch (errno) { +- case EROFS: +- wr = 0; +- break; +- case ENOENT: +- /* No main DB file */ +- sprintf(tmp, "%s.complete", fn); +- /* Handle race */ +- if (!access(tmp, R_OK)) { +- if (rename(tmp, fn) < 0 && errno == EEXIST) +- return open_file(fn, wr); +- } else +- creat(fn, 0644); +- break; +- } +- } +- fh = fopen(fn, wr ? "r+" : "r"); +- if (fh) { +- if (flock(fileno(fh), wr ? LOCK_EX : LOCK_SH) < 0) { +- fclose(fh); +- return NULL; +- } +- } +- return fh; +-} +- +-void dump_group(struct group *g, FILE *out) +-{ +- struct entry *e; +- fprintf(out, "[%s]\n", g->name); +- for (e = &g->entries[0]; e->name && !ferror(out); e++) +- fprintf(out, "%s: %s\n", e->name, e->val); +-} +- +-void dump_database(struct database *db, FILE *out) +-{ +- struct group *g; +- for (g = db->groups; g && !ferror(out); g = g->next) { +- if (g->comment) { +- fprintf(out, "#%s", g->comment); +- continue; +- } +- dump_group(g, out); +- } +-} +- +-struct group *find_group(struct database *db, char *name) +-{ +- struct group *g; +- for (g = db->groups; g; g = g->next) +- if (g->name && !strcmp(g->name, name)) +- return g; +- return NULL; +-} +- +-int delete_group(struct database *db, struct group *group) +-{ +- struct group *g, **gprev; +- gprev = &db->groups; +- for (g = *gprev; g; gprev = &g->next, g = g->next) { +- if (g == group) { +- *gprev = g->next; +- free_group(g); +- return 0; +- } +- } +- db->dirty = 1; +- return -1; +-} +- +-char *entry_val(struct group *g, char *entry) +-{ +- struct entry *e; +- for (e = &g->entries[0]; e->name; e++) +- if (!strcmp(e->name, entry)) +- return e->val; +- return NULL; +-} +- +-struct group *add_group(struct database *db, char *name, int *existed) +-{ +- struct group *g, **gprev = &db->groups; +- for (g = *gprev; g; gprev = &g->next, g = g->next) +- if (g->name && !strcmp(g->name, name)) +- break; +- if (existed) +- *existed = (g != NULL); +- if (!g) { +- g = alloc_group(xstrdup(name)); +- g->next = *gprev; +- *gprev = g; +- } +- db->dirty = 1; +- return g; +- +-} +- +-void change_entry(struct database *db, struct group *g, +- char *entry, char *newval) +-{ +- int i; +- struct entry *e, *entries; +- db->dirty = 1; +- entries = &g->entries[0]; +- for (e = entries; e->name; e++) { +- if (!strcmp(e->name, entry)) { +- free(e->val); +- e->val = xstrdup(newval); +- return; +- } +- } +- i = e - entries; +- assert(i == g->numentries); +- if (i > 0 && (i % ENTRY_CHUNK) == 0) { +- int new = (i + ENTRY_CHUNK) * sizeof(struct entry); +- g->entries = xrealloc(g->entries, new); +- } +- entries = &g->entries[0]; +- e = &entries[i]; +- e->name = xstrdup(entry); +- e->val = xstrdup(newval); +- g->numentries++; +-} +- +-void delete_entry(struct database *db, struct group *g, char *entry) +-{ +- struct entry *e; +- for (e = &g->entries[0]; e->name; e++) +- if (!strcmp(e->name, entry)) +- break; +- if (e->name == NULL) +- return; +- while ((++e)->name) +- e[-1] = e[0]; +- g->numentries--; +-} +- +-struct group * +-clone_group(struct database *db, struct group *gold, char *newname) +-{ +- struct entry *e; +- struct group *gnew = add_group(db, newname, NULL); +- for (e = &gold->entries[0]; e->name; e++) +- change_entry(db, gnew, e->name, e->val); +- return gnew; +-} +- +-static char *save_comment(char *c) +-{ +- int len = strlen(c); +- char *s = xalloc(len + 2); +- strcpy(s, c); +- if (len == 0 || c[len - 1] != '\n') +- s[len] = '\n'; +- return s; +-} +- +-void add_comment(struct database *db, struct group *group, char *comment) +-{ +- struct group *g; +- struct group **gprev = &db->groups; +- for (g = *gprev; g; gprev = &g->next, g = g->next) { +- if ((group && g == group) || (!group && g->comment == NULL)) +- break; +- } +- DB_NEW(g); +- g->comment = save_comment(comment); +- g->next = *gprev; +- *gprev = g; +- db->dirty = 1; +-} +- +-struct group *first_group(struct database *db) +-{ +- return next_group(db->groups); +-} +- +-struct group *next_group(struct group *g) +-{ +- struct group *n; +- if (!g) +- return NULL; +- n = g->next; +- while (n && n->comment) +- n = n->next; +- return n; +-} +- +-char *group_name(struct group *g) +-{ +- return g->name; +-} +- +-struct group *find_entry(struct database *db, struct group *prev, +- char *entry, char *value) +-{ +- int previ = 0; +- struct entry *e; +- struct group *g; +- if (prev) +- g = prev->next; +- else +- g = db->groups; +- for (; g; g = g->next) { +- if (g->comment) +- continue; +- /* Short cut when entry is at the same place as previous */ +- if (previ < g->numentries) { +- e = &g->entries[previ]; +- if (!strcmp(e->name, entry)) { +- if (!strcmp(e->val, value)) +- return g; +- continue; +- } +- } +- for (e = &g->entries[0]; e->name; e++) { +- if (strcmp(e->name, entry)) +- continue; +- if (!strcmp(e->val, value)) +- return g; +- previ = e - &g->entries[0]; +- break; +- } +- } +- return NULL; +-} +- +-void rename_group(struct database *db, struct group *g, char *newname) +-{ +- free(g->name); +- g->name = xstrdup(newname); +- db->dirty = 1; +-} +- +-unsigned long entry_num(struct group *g, char *entry) +-{ +- char *e = entry_val(g, entry); +- unsigned long val = 0; +- if (e) +- sscanf(e, "%lu", &val); +- return val; +-} +- +-void change_entry_num(struct database *db, struct group *g, +- char *entry, unsigned long val) +-{ +- char buf[20]; +- sprintf(buf, "%lu", val); +- change_entry(db, g, entry, buf); +-} +diff -urNp mcelog-d2e13bf0.orig/db.h mcelog-d2e13bf0/db.h +--- mcelog-d2e13bf0.orig/db.h 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/db.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,29 +0,0 @@ +-#include +-struct database; +-struct group; +- +-struct database *open_db(char *fn, int wr); +-int sync_db(struct database *db); +-int close_db(struct database *db); +-struct group *find_group(struct database *db, char *name); +-char *entry_val(struct group *g, char *entry); +-struct group *add_group(struct database *db, char *name, int *existed); +-int delete_group(struct database *db, struct group *g); +-void change_entry(struct database *db, struct group *g, +- char *entry, char *newval); +-void add_comment(struct database *db, struct group *group, char *comment); +-struct group *first_group(struct database *db); +-struct group *next_group(struct group *g); +-void dump_group(struct group *g, FILE *out); +-void dump_database(struct database *db, FILE *out); +-struct group *find_entry(struct database *db, struct group *prev, +- char *entry, char *value); +-void rename_group(struct database *db, struct group *group, char *newname); +-char *group_name(struct group *g); +-unsigned long entry_num(struct group *g, char *entry); +-void change_entry_num(struct database *db, struct group *g, char *entry, +- unsigned long val); +-void delete_entry(struct database *db, struct group *g, char *entry); +-struct group * +-clone_group(struct database *db, struct group *gold, char *newname); +- +diff -urNp mcelog-d2e13bf0.orig/dbquery.c mcelog-d2e13bf0/dbquery.c +--- mcelog-d2e13bf0.orig/dbquery.c 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/dbquery.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,130 +0,0 @@ +-/* Access db files. This is for testing and debugging only. */ +-#define _GNU_SOURCE 1 +-#include +-#include +-#include +-#include +-#include +-#include +-#include "db.h" +- +-#define C(x) if (x) printf(#x " failed: %s\n", strerror(errno)) +-#define NEEDGROUP if (!group) { printf("need group first\n"); break; } +- +-void Eprintf(char *fmt, ...) +-{ +- va_list ap; +- va_start(ap, fmt); +- vfprintf(stderr, fmt, ap); +- va_end(ap); +-} +- +-void usage(void) +-{ +- printf( +- "s sync\n" +- "q close/quit\n" +- "ggroup find group\n" +- "G delete group\n" +- "agroup add group\n" +- "ventry dump entry\n" +- "centry,val change entry to val\n" +- "fentry,val find entry with value and dump its group\n" +- "Ccomment add comment\n" +- "Lnewname clone group to newname\n" +- "d dump group\n" +- "D dump database\n"); +-} +- +-int main(int ac, char **av) +-{ +- struct database *db; +- struct group *group = NULL; +- char *line = NULL; +- size_t linesz = 0; +- if (!av[1]) { +- printf("%s database\n", av[0]); +- exit(1); +- } +- printf("dbtest\n"); +- db = open_db(av[1], 1); +- while (printf("> "), +- fflush(stdout), +- getline(&line, &linesz, stdin) > 0) { +- char *p = line + strlen(line) - 1; +- while (p >= line && isspace(*p)) +- *p-- = 0; +- switch (line[0]) { +- case 's': +- C(sync_db(db)); +- break; +- case 'q': +- C(close_db(db)); +- exit(0); +- case 'g': +- group = find_group(db, line + 1); +- if (group) +- printf("found\n"); +- break; +- case 'G': +- NEEDGROUP; +- C(delete_group(db, group)); +- group = NULL; +- break; +- case 'a': { +- int existed = 0; +- group = add_group(db, line + 1, &existed); +- if (existed) +- printf("existed\n"); +- break; +- } +- case 'v': +- NEEDGROUP; +- printf("%s\n", entry_val(group, line + 1)); +- break; +- case 'c': { +- p = line + 1; +- char *entry = strsep(&p, ","); +- NEEDGROUP; +- change_entry(db, group, entry, strsep(&p, "")); +- break; +- } +- case 'L': +- NEEDGROUP; +- clone_group(db, group, line + 1); +- break; +- case 'f': { +- struct group *g; +- p = line + 1; +- char *entry = strsep(&p, ","); +- char *val = strsep(&p, ""); +- g = NULL; +- int nr = 0; +- while ((g = find_entry(db, g, entry, val)) != NULL) { +- if (nr == 0) +- group = g; +- nr++; +- dump_group(group, stdout); +- } +- if (nr == 0) +- printf("not found\n"); +- break; +- } +- case 'C': +- NEEDGROUP; +- add_comment(db, group, line + 1); +- break; +- case 'd': +- NEEDGROUP; +- dump_group(group, stdout); +- break; +- case 'D': +- dump_database(db, stdout); +- break; +- default: +- usage(); +- break; +- } +- } +- return 0; +-} +diff -urNp mcelog-d2e13bf0.orig/denverton.c mcelog-d2e13bf0/denverton.c +--- mcelog-d2e13bf0.orig/denverton.c 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/denverton.c 2016-11-30 11:24:12.204619369 -0500 +@@ -0,0 +1,45 @@ ++/* Copyright (C) 2016 Intel Corporation ++ Decode Intel Denverton specific machine check errors. ++ ++ mcelog is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public ++ License as published by the Free Software Foundation; version ++ 2. ++ ++ mcelog is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should find a copy of v2 of the GNU General Public License somewhere ++ on your Linux system; if not, write to the Free Software Foundation, ++ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ ++ Author: Tony Luck ++*/ ++ ++#include "mcelog.h" ++#include "bitfield.h" ++#include "denverton.h" ++#include "memdb.h" ++ ++/* See IA32 SDM Vol3B Table 16-33 */ ++ ++static struct field mc_bits[] = { ++ SBITFIELD(16, "Cmd/Addr parity"), ++ SBITFIELD(17, "Corrected Demand/Patrol Scrub Error"), ++ SBITFIELD(18, "Uncorrected patrol scrub error"), ++ SBITFIELD(19, "Uncorrected demand read error"), ++ SBITFIELD(20, "WDB read ECC"), ++ {} ++}; ++ ++void denverton_decode_model(int cputype, int bank, u64 status, u64 misc) ++{ ++ switch (bank) { ++ case 6: case 7: ++ Wprintf("MemCtrl: "); ++ decode_bitfield(status, mc_bits); ++ break; ++ } ++} +diff -urNp mcelog-d2e13bf0.orig/denverton.h mcelog-d2e13bf0/denverton.h +--- mcelog-d2e13bf0.orig/denverton.h 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/denverton.h 2016-11-30 11:24:12.204619369 -0500 +@@ -0,0 +1 @@ ++void denverton_decode_model(int cputype, int bank, u64 status, u64 misc); +diff -urNp mcelog-d2e13bf0.orig/diskdb.c mcelog-d2e13bf0/diskdb.c +--- mcelog-d2e13bf0.orig/diskdb.c 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/diskdb.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,96 +0,0 @@ +-/* High level interface to disk based DIMM database */ +-/* Note: obsolete: new design is in memdb.c */ +-#include +-#include +-#include +-#include "mcelog.h" +-#include "diskdb.h" +-#include "paths.h" +-#include "dimm.h" +-#include "dmi.h" +- +-char *error_trigger; +-unsigned error_thresh = 20; +-char *dimm_db_fn = DIMM_DB_FILENAME; +- +-static void checkdimmdb(void) +-{ +- if (open_dimm_db(dimm_db_fn) < 0) +- exit(1); +-} +- +-int diskdb_modifier(int opt) +-{ +- char *end; +- +- switch (opt) { +- case O_DATABASE: +- dimm_db_fn = optarg; +- checkdmi(); +- checkdimmdb(); +- break; +- case O_ERROR_TRIGGER: +- checkdmi(); +- open_dimm_db(dimm_db_fn); +- error_thresh = strtoul(optarg, &end, 0); +- if (end == optarg || *end != ',') +- usage(); +- error_trigger = end + 1; +- break; +- default: +- return 0; +- } +- return 1; +-} +- +-void diskdb_resolve_addr(u64 addr) +-{ +- if (open_dimm_db(dimm_db_fn) >= 0) +- new_error(addr, error_thresh, error_trigger); +-} +- +- +-void diskdb_usage(void) +-{ +- fprintf(stderr, +- "Manage disk DIMM error database\n" +- " mcelog [options] --drop-old-memory|--reset-memory locator\n" +- " mcelog --dump-memory locator\n" +- " old can be either locator or name\n" +- "Disk database options:" +- "--database fn Set filename of DIMM database (default " DIMM_DB_FILENAME ")\n" +- "--error-trigger cmd,thresh Run cmd on exceeding thresh errors per DIMM\n"); +-} +- +- +-static void dimm_common(int ac, char **av) +-{ +- no_syslog(); +- checkdmi(); +- checkdimmdb(); +- argsleft(ac, av); +-} +- +-int diskdb_cmd(int opt, int ac, char **av) +-{ +- char *arg = optarg; +- +- switch (opt) { +- case O_DUMP_MEMORY: +- dimm_common(ac, av); +- if (arg) +- dump_dimm(arg); +- else +- dump_all_dimms(); +- return 1; +- case O_RESET_MEMORY: +- dimm_common(ac, av); +- reset_dimm(arg); +- return 1; +- case O_DROP_OLD_MEMORY: +- dimm_common(ac, av); +- gc_dimms(); +- return 1; +- } +- return 0; +-} +diff -urNp mcelog-d2e13bf0.orig/diskdb.h mcelog-d2e13bf0/diskdb.h +--- mcelog-d2e13bf0.orig/diskdb.h 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/diskdb.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,32 +0,0 @@ +- +-#ifdef CONFIG_DISKDB +-enum diskdb_options { +- O_DATABASE = O_DISKDB, +- O_ERROR_TRIGGER, +- O_DUMP_MEMORY, +- O_RESET_MEMORY, +- O_DROP_OLD_MEMORY, +-}; +- +-void diskdb_resolve_addr(u64 addr); +-int diskdb_modifier(int opt); +-int diskdb_cmd(int opt, int ac, char **av); +-void diskdb_usage(void); +- +-#define DISKDB_OPTIONS \ +- { "database", 1, NULL, O_DATABASE }, \ +- { "error-trigger", 1, NULL, O_ERROR_TRIGGER }, \ +- { "dump-memory", 2, NULL, O_DUMP_MEMORY }, \ +- { "reset-memory", 2, NULL, O_RESET_MEMORY }, \ +- { "drop-old-memory", 0, NULL, O_DROP_OLD_MEMORY }, +- +-#else +- +-static inline void diskdb_resolve_addr(u64 addr) {} +-static inline int diskdb_modifier(int opt) { return 0; } +-static inline int diskdb_cmd(int opt, int ac, char **av) { return 0; } +-static inline void diskdb_usage(void) {} +- +-#define DISKDB_OPTIONS +- +-#endif +diff -urNp mcelog-d2e13bf0.orig/dmi.h mcelog-d2e13bf0/dmi.h +--- mcelog-d2e13bf0.orig/dmi.h 2016-11-30 11:23:54.534909314 -0500 ++++ mcelog-d2e13bf0/dmi.h 2016-11-30 11:24:12.205619409 -0500 +@@ -3,7 +3,7 @@ struct dmi_entry { + unsigned char type; + unsigned char length; + unsigned short handle; +-}; ++} __attribute__((packed)); + + enum { + DMI_MEMORY_ARRAY = 16, +diff -urNp mcelog-d2e13bf0.orig/.gitignore mcelog-d2e13bf0/.gitignore +--- mcelog-d2e13bf0.orig/.gitignore 2016-11-30 11:23:54.530909154 -0500 ++++ mcelog-d2e13bf0/.gitignore 2016-11-30 11:24:12.202619289 -0500 +@@ -8,3 +8,5 @@ dbquery + .depend + tsc + core ++version.c ++version.tmp +diff -urNp mcelog-d2e13bf0.orig/input/bdw_mirror1 mcelog-d2e13bf0/input/bdw_mirror1 +--- mcelog-d2e13bf0.orig/input/bdw_mirror1 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/input/bdw_mirror1 2016-11-30 11:24:12.205619409 -0500 +@@ -0,0 +1,6 @@ ++# Broadwell mirror corrected with mirror failover ++CPU 0 7 ++PROCESSOR 0:0x406f0 ++STATUS 0x8800000000000080 ++MISC 20000000000 ++ +diff -urNp mcelog-d2e13bf0.orig/input/bdw_mirror2 mcelog-d2e13bf0/input/bdw_mirror2 +--- mcelog-d2e13bf0.orig/input/bdw_mirror2 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/input/bdw_mirror2 2016-11-30 11:24:12.205619409 -0500 +@@ -0,0 +1,6 @@ ++# Broadwell mirror corrected with successful scrub ++CPU 0 7 ++PROCESSOR 0:0x406f0 ++STATUS 0x8800000000000080 ++MISC 40000000000 ++ +diff -urNp mcelog-d2e13bf0.orig/input/GENMEM mcelog-d2e13bf0/input/GENMEM +--- mcelog-d2e13bf0.orig/input/GENMEM 2016-11-30 11:23:54.532909234 -0500 ++++ mcelog-d2e13bf0/input/GENMEM 2016-11-30 11:24:12.205619409 -0500 +@@ -11,7 +11,7 @@ dimm=${3:-0} + corr_err_cnt=${4:-0} + + if [ ! -z "$5" ] ; then +- ucflag=$[1 << (61-32)] ++ ucflag=$[(1 << (61-32)) | (1 << (60-32)) | (1 << (56-32))] + else + ucflag=0 + fi +diff -urNp mcelog-d2e13bf0.orig/input/skx_mirror1 mcelog-d2e13bf0/input/skx_mirror1 +--- mcelog-d2e13bf0.orig/input/skx_mirror1 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/input/skx_mirror1 2016-11-30 11:24:12.205619409 -0500 +@@ -0,0 +1,6 @@ ++# Skylake mirror corrected with mirror failover ++CPU 0 7 ++PROCESSOR 0:0x50650 ++STATUS 0x8800000000000080 ++MISC 8000000000000000 ++ +diff -urNp mcelog-d2e13bf0.orig/input/skx_mirror2 mcelog-d2e13bf0/input/skx_mirror2 +--- mcelog-d2e13bf0.orig/input/skx_mirror2 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/input/skx_mirror2 2016-11-30 11:24:12.205619409 -0500 +@@ -0,0 +1,6 @@ ++# Skylake mirror corrected with successful scrub ++CPU 0 7 ++PROCESSOR 0:0x50650 ++STATUS 0x8800000000000080 ++MISC 4000000000000000 ++ +diff -urNp mcelog-d2e13bf0.orig/intel.c mcelog-d2e13bf0/intel.c +--- mcelog-d2e13bf0.orig/intel.c 2016-11-30 11:23:54.538909475 -0500 ++++ mcelog-d2e13bf0/intel.c 2016-11-30 11:24:12.206619450 -0500 +@@ -25,7 +25,6 @@ + #include "sandy-bridge.h" + #include "ivy-bridge.h" + #include "haswell.h" +-#include "xeon75xx.h" + + int memory_error_support; + +@@ -36,7 +35,9 @@ void intel_cpu_init(enum cputype cpu) + cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX || + cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL || + cpu == CPU_BROADWELL_DE || cpu == CPU_BROADWELL_EPEX || +- cpu == CPU_KNIGHTS_LANDING || cpu == CPU_SKYLAKE || cpu == CPU_SKYLAKE_XEON) ++ cpu == CPU_KNIGHTS_LANDING || cpu == CPU_KNIGHTS_MILL || ++ cpu == CPU_SKYLAKE || cpu == CPU_SKYLAKE_XEON || ++ cpu == CPU_KABYLAKE || cpu == CPU_DENVERTON) + memory_error_support = 1; + } + +@@ -82,6 +83,8 @@ enum cputype select_intel_cputype(int fa + return CPU_BROADWELL_DE; + else if (model == 0x57) + return CPU_KNIGHTS_LANDING; ++ else if (model == 0x85) ++ return CPU_KNIGHTS_MILL; + else if (model == 0x1c || model == 0x26 || model == 0x27 || + model == 0x35 || model == 0x36 || model == 0x36 || + model == 0x37 || model == 0x4a || model == 0x4c || +@@ -91,18 +94,22 @@ enum cputype select_intel_cputype(int fa + return CPU_SKYLAKE; + else if (model == 0x55) + return CPU_SKYLAKE_XEON; ++ else if (model == 0x8E || model == 0x9E) ++ return CPU_KABYLAKE; ++ else if (model == 0x5f) ++ return CPU_DENVERTON; + if (model > 0x1a) { +- Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n", ++ Eprintf("Family 6 Model %u CPU: only decoding architectural errors\n", + model); + return CPU_INTEL; + } + } + if (family > 6) { +- Eprintf("Family %u Model %x CPU: only decoding architectural errors\n", ++ Eprintf("Family %u Model %u CPU: only decoding architectural errors\n", + family, model); + return CPU_INTEL; + } +- Eprintf("Unknown Intel CPU type family %x model %x\n", family, model); ++ Eprintf("Unknown Intel CPU type family %u model %u\n", family, model); + return family == 6 ? CPU_P6OLD : CPU_GENERIC; + } + +@@ -127,9 +134,6 @@ static int intel_memory_error(struct mce + case CPU_NEHALEM: + nehalem_memerr_misc(m, channel, dimm); + break; +- case CPU_XEON75XX: +- xeon75xx_memory_error(m, recordlen, channel, dimm); +- break; + case CPU_SANDY_BRIDGE_EP: + sandy_bridge_ep_memerr_misc(m, channel, dimm); + break; +diff -urNp mcelog-d2e13bf0.orig/intel.c.orig mcelog-d2e13bf0/intel.c.orig +--- mcelog-d2e13bf0.orig/intel.c.orig 2016-11-30 11:23:54.539909515 -0500 ++++ mcelog-d2e13bf0/intel.c.orig 2016-11-30 11:23:40.052327334 -0500 +@@ -35,7 +35,8 @@ void intel_cpu_init(enum cputype cpu) + cpu == CPU_SANDY_BRIDGE || cpu == CPU_SANDY_BRIDGE_EP || + cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX || + cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL || +- cpu == CPU_KNIGHTS_LANDING) ++ cpu == CPU_BROADWELL_DE || cpu == CPU_BROADWELL_EPEX || ++ cpu == CPU_KNIGHTS_LANDING || cpu == CPU_SKYLAKE || cpu == CPU_SKYLAKE_XEON) + memory_error_support = 1; + } + +@@ -73,15 +74,23 @@ enum cputype select_intel_cputype(int fa + return CPU_HASWELL; + else if (model == 0x3f) + return CPU_HASWELL_EPEX; +- else if (model == 0x3d || model == 0x56) ++ else if (model == 0x3d) + return CPU_BROADWELL; +- else if (model == 0x57) ++ else if (model == 0x4f) ++ return CPU_BROADWELL_EPEX; ++ else if (model == 0x56) ++ return CPU_BROADWELL_DE; ++ else if (model == 0x57) + return CPU_KNIGHTS_LANDING; + else if (model == 0x1c || model == 0x26 || model == 0x27 || + model == 0x35 || model == 0x36 || model == 0x36 || + model == 0x37 || model == 0x4a || model == 0x4c || + model == 0x4d || model == 0x5a || model == 0x5d) + return CPU_ATOM; ++ else if (model == 0x4e || model == 0x5e) ++ return CPU_SKYLAKE; ++ else if (model == 0x55) ++ return CPU_SKYLAKE_XEON; + if (model > 0x1a) { + Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n", + model); +diff -urNp mcelog-d2e13bf0.orig/intel.h mcelog-d2e13bf0/intel.h +--- mcelog-d2e13bf0.orig/intel.h 2016-11-30 11:23:54.530909154 -0500 ++++ mcelog-d2e13bf0/intel.h 2016-11-30 11:24:12.206619450 -0500 +@@ -25,6 +25,9 @@ extern int memory_error_support; + case CPU_BROADWELL_EPEX: \ + case CPU_ATOM: \ + case CPU_KNIGHTS_LANDING: \ ++ case CPU_KNIGHTS_MILL: \ + case CPU_SKYLAKE: \ +- case CPU_SKYLAKE_XEON ++ case CPU_SKYLAKE_XEON: \ ++ case CPU_KABYLAKE: \ ++ case CPU_DENVERTON + +diff -urNp mcelog-d2e13bf0.orig/leaky-bucket.c mcelog-d2e13bf0/leaky-bucket.c +--- mcelog-d2e13bf0.orig/leaky-bucket.c 2016-11-30 11:23:54.537909435 -0500 ++++ mcelog-d2e13bf0/leaky-bucket.c 2016-11-30 11:24:12.206619450 -0500 +@@ -72,7 +72,9 @@ static int timeconv(char unit, int *out) + case 'h': corr *= 60; + case 'm': corr *= 60; + case 0: break; +- default: return -1; ++ default: ++ *out = 1; ++ return -1; + } + *out = corr; + return 0; +diff -urNp mcelog-d2e13bf0.orig/Makefile mcelog-d2e13bf0/Makefile +--- mcelog-d2e13bf0.orig/Makefile 2016-11-30 11:23:54.538909475 -0500 ++++ mcelog-d2e13bf0/Makefile 2016-11-30 11:24:12.202619289 -0500 +@@ -17,11 +17,6 @@ WARNINGS := -Wall -Wextra -Wno-missing-f + -Wstrict-prototypes -Wformat-security -Wmissing-declarations \ + -Wdeclaration-after-statement + +-# The on disk database has still many problems (partly in this code and partly +-# due to missing support from BIOS), so it's disabled by default. You can +-# enable it here by uncommenting the following line +-# CONFIG_DISKDB = 1 +- + TRIGGERS=cache-error-trigger dimm-error-trigger page-error-trigger \ + socket-memory-error-trigger \ + bus-error-trigger \ +@@ -36,23 +31,16 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co + nehalem.o dunnington.o tulsa.o config.o memutil.o msg.o \ + eventloop.o leaky-bucket.o memdb.o server.o trigger.o \ + client.o cache.o sysfs.o yellow.o page.o rbtree.o \ +- xeon75xx.o sandy-bridge.o ivy-bridge.o haswell.o \ ++ sandy-bridge.o ivy-bridge.o haswell.o \ + broadwell_de.o broadwell_epex.o skylake_xeon.o \ ++ denverton.o \ + msr.o bus.o unknown.o +-DISKDB_OBJ := diskdb.o dimm.o db.o +-CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o ${DISKDB_OBJ} \ ++CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o \ + version.o version.c version.tmp + DOC := mce.pdf + + ADD_DEFINES := + +-ifdef CONFIG_DISKDB +-ADD_DEFINES := -DCONFIG_DISKDB=1 +-OBJ += ${DISKDB_OBJ} +- +-all: dbquery +-endif +- + SRC := $(OBJ:.o=.c) + + mcelog: ${OBJ} version.o +diff -urNp mcelog-d2e13bf0.orig/mcelog.c mcelog-d2e13bf0/mcelog.c +--- mcelog-d2e13bf0.orig/mcelog.c 2016-11-30 11:23:54.531909194 -0500 ++++ mcelog-d2e13bf0/mcelog.c 2016-11-30 11:25:24.563516902 -0500 +@@ -48,7 +48,6 @@ + #include "tsc.h" + #include "version.h" + #include "config.h" +-#include "diskdb.h" + #include "memutil.h" + #include "eventloop.h" + #include "memdb.h" +@@ -236,9 +235,12 @@ static char *cputype_name[] = { + [CPU_BROADWELL_DE] = "Intel Xeon (Broadwell) D family", + [CPU_BROADWELL_EPEX] = "Intel Xeon v4 (Broadwell) EP/EX", + [CPU_KNIGHTS_LANDING] = "Knights Landing", ++ [CPU_KNIGHTS_MILL] = "Knights Mill", + [CPU_ATOM] = "ATOM", + [CPU_SKYLAKE] = "Skylake", + [CPU_SKYLAKE_XEON] = "Skylake server", ++ [CPU_KABYLAKE] = "Kabylake", ++ [CPU_DENVERTON] = "Denverton", + }; + + static struct config_choice cpu_choices[] = { +@@ -282,10 +284,13 @@ static struct config_choice cpu_choices[ + { "broadwell-ep", CPU_BROADWELL_EPEX }, + { "broadwell-ex", CPU_BROADWELL_EPEX }, + { "knightslanding", CPU_KNIGHTS_LANDING }, ++ { "knightsmill", CPU_KNIGHTS_MILL }, + { "xeon-v4", CPU_BROADWELL_EPEX }, + { "atom", CPU_ATOM }, + { "skylake", CPU_SKYLAKE }, + { "skylake_server", CPU_SKYLAKE_XEON }, ++ { "kabylake", CPU_KABYLAKE }, ++ { "denverton", CPU_DENVERTON }, + { NULL } + }; + +@@ -356,7 +361,7 @@ static enum cputype setup_cpuid(u32 cpuv + return CPU_K8; + /* FALL THROUGH */ + default: +- Eprintf("Unknown CPU type vendor %u family %x model %x", ++ Eprintf("Unknown CPU type vendor %u family %u model %u", + cpuvendor, family, model); + return CPU_GENERIC; + } +@@ -449,12 +454,10 @@ static void dump_mce(struct mce *m, unsi + if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX && + cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL && + cputype != CPU_BROADWELL_DE && cputype != CPU_BROADWELL_EPEX && +- cputype != CPU_KNIGHTS_LANDING && cputype != CPU_SKYLAKE && +- cputype != CPU_SKYLAKE_XEON) ++ cputype != CPU_KNIGHTS_LANDING && cputype != CPU_KNIGHTS_MILL && ++ cputype != CPU_SKYLAKE && cputype != CPU_SKYLAKE_XEON && ++ cputype != CPU_KABYLAKE && cputype != CPU_DENVERTON) + resolveaddr(m->addr); +- if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) { +- diskdb_resolve_addr(m->addr); +- } + } + + static void dump_mce_raw_ascii(struct mce *m, unsigned recordlen) +@@ -889,6 +892,7 @@ static void remove_pidfile(void) + static void signal_exit(int sig) + { + remove_pidfile(); ++ client_cleanup(); + _exit(sig); + } + +@@ -974,7 +978,6 @@ void usage(void) + "--no-imc-log Disable extended iMC logging\n" + "--is-cpu-supported Exit with return code indicating whether the CPU is supported\n" + ); +- diskdb_usage(); + printf("\n"); + print_cputypes(); + exit(1); +@@ -1043,7 +1046,6 @@ static struct option options[] = { + { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ + { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, + { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, +- DISKDB_OPTIONS + {} + }; + +@@ -1191,8 +1193,6 @@ void no_syslog(void) + static int combined_modifier(int opt) + { + int r = modifier(opt); +- if (r == 0) +- r = diskdb_modifier(opt); + return r; + } + +@@ -1369,8 +1369,6 @@ int main(int ac, char **av) + noargs(ac, av); + fprintf(stderr, "mcelog %s\n", MCELOG_VERSION); + exit(0); +- } else if (diskdb_cmd(opt, ac, av)) { +- exit(0); + } else if (opt == 0) + break; + } +diff -urNp mcelog-d2e13bf0.orig/mcelog.c.orig mcelog-d2e13bf0/mcelog.c.orig +--- mcelog-d2e13bf0.orig/mcelog.c.orig 2016-11-30 11:23:54.540909556 -0500 ++++ mcelog-d2e13bf0/mcelog.c.orig 2016-11-30 11:23:40.054327414 -0500 +@@ -85,6 +85,7 @@ static char *pidfile = pidfile_default; + static char *logfile; + static int debug_numerrors; + int imc_log = -1; ++static int check_only = 0; + + static int is_cpu_supported(void); + +@@ -131,7 +132,7 @@ static char *bankname(unsigned bank) + } + } + +-static void resolveaddr(unsigned long addr) ++static void resolveaddr(unsigned long long addr) + { + if (addr && do_dmi && dmi_forced) + dmi_decodeaddr(addr); +@@ -232,8 +233,12 @@ static char *cputype_name[] = { + [CPU_HASWELL] = "Haswell", /* Fill in better name */ + [CPU_HASWELL_EPEX] = "Haswell EP/EX", /* Fill in better name */ + [CPU_BROADWELL] = "Broadwell", ++ [CPU_BROADWELL_DE] = "Intel Xeon (Broadwell) D family", ++ [CPU_BROADWELL_EPEX] = "Intel Xeon v4 (Broadwell) EP/EX", + [CPU_KNIGHTS_LANDING] = "Knights Landing", + [CPU_ATOM] = "ATOM", ++ [CPU_SKYLAKE] = "Skylake", ++ [CPU_SKYLAKE_XEON] = "Skylake server", + }; + + static struct config_choice cpu_choices[] = { +@@ -273,8 +278,14 @@ static struct config_choice cpu_choices[ + { "haswell-ep", CPU_HASWELL_EPEX }, /* Fill in better name */ + { "haswell-ex", CPU_HASWELL_EPEX }, /* Fill in better name */ + { "broadwell", CPU_BROADWELL }, ++ { "broadwell-d", CPU_BROADWELL_DE }, ++ { "broadwell-ep", CPU_BROADWELL_EPEX }, ++ { "broadwell-ex", CPU_BROADWELL_EPEX }, + { "knightslanding", CPU_KNIGHTS_LANDING }, ++ { "xeon-v4", CPU_BROADWELL_EPEX }, + { "atom", CPU_ATOM }, ++ { "skylake", CPU_SKYLAKE }, ++ { "skylake_server", CPU_SKYLAKE_XEON }, + { NULL } + }; + +@@ -437,7 +448,9 @@ static void dump_mce(struct mce *m, unsi + } + if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX && + cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL && +- cputype != CPU_KNIGHTS_LANDING) ++ cputype != CPU_BROADWELL_DE && cputype != CPU_BROADWELL_EPEX && ++ cputype != CPU_KNIGHTS_LANDING && cputype != CPU_SKYLAKE && ++ cputype != CPU_SKYLAKE_XEON) + resolveaddr(m->addr); + if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) { + diskdb_resolve_addr(m->addr); +@@ -916,22 +929,35 @@ void usage(void) + { + fprintf(stderr, + "Usage:\n" ++"\n" + " mcelog [options] [mcelogdevice]\n" + "Decode machine check error records from current kernel.\n" ++"\n" + " mcelog [options] --daemon\n" + "Run mcelog in daemon mode, waiting for errors from the kernel.\n" ++"\n" + " mcelog [options] --client\n" + "Query a currently running mcelog daemon for errors\n" ++"\n" + " mcelog [options] --ascii < log\n" + " mcelog [options] --ascii --file log\n" + "Decode machine check ASCII output from kernel logs\n" ++"\n" + "Options:\n" ++"--version Show the version of mcelog and exit\n" + "--cpu CPU Set CPU type CPU to decode (see below for valid types)\n" ++"--intel-cpu FAMILY,MODEL Set CPU type for an Intel CPU based on family and model from cpuid\n" ++"--k8 Set the CPU to be an AMD K8\n" ++"--p4 Set the CPU to be an Intel Pentium4\n" ++"--core2 Set the CPU to be an Intel Core2\n" ++"--generic Set the CPU to a generic version\n" + "--cpumhz MHZ Set CPU Mhz to decode time (output unreliable, not needed on new kernels)\n" + "--raw (with --ascii) Dump in raw ASCII format for machine processing\n" + "--daemon Run in background waiting for events (needs newer kernel)\n" ++"--client Query a currently running mcelog daemon for errors\n" + "--ignorenodev Exit silently when the device cannot be opened\n" + "--file filename With --ascii read machine check log from filename instead of stdin\n" ++"--logfile filename Log decoded machine checks in file filename\n" + "--syslog Log decoded machine checks in syslog (default stdout or syslog for daemon)\n" + "--syslog-error Log decoded machine checks in syslog with error level\n" + "--no-syslog Never log anything to syslog\n" +@@ -946,8 +972,10 @@ void usage(void) + "--num-errors N Only process N errors (for testing)\n" + "--pidfile file Write pid of daemon into file\n" + "--no-imc-log Disable extended iMC logging\n" ++"--is-cpu-supported Exit with return code indicating whether the CPU is supported\n" + ); + diskdb_usage(); ++ printf("\n"); + print_cputypes(); + exit(1); + } +@@ -980,6 +1008,7 @@ enum options { + O_PIDFILE, + O_DEBUG_NUMERRORS, + O_NO_IMC_LOG, ++ O_IS_CPU_SUPPORTED, + }; + + static struct option options[] = { +@@ -1013,6 +1042,7 @@ static struct option options[] = { + { "pidfile", 1, NULL, O_PIDFILE }, + { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ + { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, ++ { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, + DISKDB_OPTIONS + {} + }; +@@ -1115,6 +1145,9 @@ static int modifier(int opt) + case O_NO_IMC_LOG: + imc_log = 0; + break; ++ case O_IS_CPU_SUPPORTED: ++ check_only = 1; ++ break; + case 0: + break; + default: +@@ -1344,15 +1377,19 @@ int main(int ac, char **av) + + /* before doing anything else let's see if the CPUs are supported */ + if (!cpu_forced && !is_cpu_supported()) { +- fprintf(stderr, "CPU is unsupported\n"); ++ if (!check_only) ++ fprintf(stderr, "CPU is unsupported\n"); + exit(1); + } ++ if (check_only) ++ exit(0); + + /* If the user didn't tell us not to use iMC logging, check if CPU supports it */ + if (imc_log == -1) { + switch (cputype) { + case CPU_SANDY_BRIDGE_EP: + case CPU_IVY_BRIDGE_EPEX: ++ case CPU_HASWELL_EPEX: + imc_log = 1; + break; + default: +diff -urNp mcelog-d2e13bf0.orig/mcelog.h mcelog-d2e13bf0/mcelog.h +--- mcelog-d2e13bf0.orig/mcelog.h 2016-11-30 11:23:54.539909515 -0500 ++++ mcelog-d2e13bf0/mcelog.h 2016-11-30 11:24:12.207619490 -0500 +@@ -127,9 +127,12 @@ enum cputype { + CPU_BROADWELL_DE, + CPU_BROADWELL_EPEX, + CPU_KNIGHTS_LANDING, ++ CPU_KNIGHTS_MILL, + CPU_ATOM, + CPU_SKYLAKE, + CPU_SKYLAKE_XEON, ++ CPU_KABYLAKE, ++ CPU_DENVERTON, + }; + + enum option_ranges { +diff -urNp mcelog-d2e13bf0.orig/mcelog.service mcelog-d2e13bf0/mcelog.service +--- mcelog-d2e13bf0.orig/mcelog.service 2016-11-30 11:23:54.540909556 -0500 ++++ mcelog-d2e13bf0/mcelog.service 2016-11-30 11:24:12.207619490 -0500 +@@ -5,6 +5,7 @@ After=syslog.target + [Service] + ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground + StandardOutput=syslog ++SuccessExitStatus=0 15 + + [Install] + WantedBy=multi-user.target +diff -urNp mcelog-d2e13bf0.orig/msr.c mcelog-d2e13bf0/msr.c +--- mcelog-d2e13bf0.orig/msr.c 2016-11-30 11:23:54.538909475 -0500 ++++ mcelog-d2e13bf0/msr.c 2016-11-30 11:24:12.207619490 -0500 +@@ -25,19 +25,20 @@ static void domsr(int cpu, int msr, int + } + if (pread(fd, &data, sizeof data, msr) != sizeof data) { + SYSERRprintf("Cannot read MSR_ERROR_CONTROL from %s\n", fpath); +- return; ++ goto out; + } + data |= bit; + if (pwrite(fd, &data, sizeof data, msr) != sizeof data) { + SYSERRprintf("Cannot write MSR_ERROR_CONTROL to %s\n", fpath); +- return; ++ goto out; + } + if (pread(fd, &data, sizeof data, msr) != sizeof data) { + SYSERRprintf("Cannot re-read MSR_ERROR_CONTROL from %s\n", fpath); +- return; ++ goto out; + } + if ((data & bit) == 0) + Lprintf("No DIMM detection available on cpu %d (normal in virtual environments)\n", cpu); ++out: + close(fd); + } + +diff -urNp mcelog-d2e13bf0.orig/nehalem.c mcelog-d2e13bf0/nehalem.c +--- mcelog-d2e13bf0.orig/nehalem.c 2016-11-30 11:23:54.537909435 -0500 ++++ mcelog-d2e13bf0/nehalem.c 2016-11-30 11:24:12.207619490 -0500 +@@ -24,7 +24,6 @@ + #include "nehalem.h" + #include "bitfield.h" + #include "memdb.h" +-#include "xeon75xx.h" + + /* See IA32 SDM Vol3B Appendix E.3.2 ff */ + +@@ -130,7 +129,8 @@ void decode_memory_controller(u32 status + if ((status & 0xf) == 0xf) + strcpy(channel, "unspecified"); + else { +- if (cputype == CPU_KNIGHTS_LANDING) /* Fix for Knights Landing MIC */ ++ /* Fix for Knights Landing/Mill MIC */ ++ if (cputype == CPU_KNIGHTS_LANDING || cputype == CPU_KNIGHTS_MILL) + sprintf(channel, "%u", (status & 0xf) + 3 * (bank == 15)); + else + sprintf(channel, "%u", status & 0xf); +@@ -170,7 +170,6 @@ void xeon75xx_decode_model(struct mce *m + decode_bitfield(status, internal_error_status); + decode_numfield(status, internal_error_numbers); + } +- xeon75xx_decode_dimm(m, msize); + } + + /* Nehalem-EP specific DIMM decoding */ +diff -urNp mcelog-d2e13bf0.orig/p4.c mcelog-d2e13bf0/p4.c +--- mcelog-d2e13bf0.orig/p4.c 2016-11-30 11:23:54.534909314 -0500 ++++ mcelog-d2e13bf0/p4.c 2016-11-30 11:24:12.208619530 -0500 +@@ -39,6 +39,7 @@ + #include "broadwell_de.h" + #include "broadwell_epex.h" + #include "skylake_xeon.h" ++#include "denverton.h" + + /* decode mce for P4/Xeon and Core2 family */ + +@@ -289,10 +290,29 @@ static const char *arstate[4] = { + [3] = "SRAR" + }; + ++static const char *ce_types[] = { ++ [0] = "ecc", ++ [1] = "mirroring with channel failover", ++ [2] = "mirroring. Primary channel scrubbed successfully" ++}; ++ ++static int check_for_mirror(__u8 bank, __u64 status, __u64 misc) ++{ ++ switch (cputype) { ++ case CPU_BROADWELL_EPEX: ++ return bdw_epex_ce_type(bank, status, misc); ++ case CPU_SKYLAKE_XEON: ++ return skylake_s_ce_type(bank, status, misc); ++ default: ++ return 0; ++ } ++} ++ + static int decode_mci(__u64 status, __u64 misc, int cpu, unsigned mcgcap, int *ismemerr, + int socket, __u8 bank) + { + u64 track = 0; ++ int i; + + Wprintf("MCi status:\n"); + if (!(status & MCI_STATUS_VAL)) +@@ -303,6 +323,8 @@ static int decode_mci(__u64 status, __u6 + + if (status & MCI_STATUS_UC) + Wprintf("Uncorrected error\n"); ++ else if ((i = check_for_mirror(bank, status, misc))) ++ Wprintf("Corrected error by %s\n", ce_types[i]); + else + Wprintf("Corrected error\n"); + +@@ -428,6 +450,9 @@ void decode_intel_mc(struct mce *log, in + case CPU_SKYLAKE_XEON: + skylake_s_decode_model(cputype, log->bank, log->status, log->misc); + break; ++ case CPU_DENVERTON: ++ denverton_decode_model(cputype, log->bank, log->status, log->misc); ++ break; + } + } + +diff -urNp mcelog-d2e13bf0.orig/README mcelog-d2e13bf0/README +--- mcelog-d2e13bf0.orig/README 2016-11-30 11:23:54.538909475 -0500 ++++ mcelog-d2e13bf0/README 1969-12-31 19:00:00.000000000 -0500 +@@ -1,119 +0,0 @@ +-mcelog is the user space backend for logging machine check errors +-reported by the hardware to the kernel. The kernel does the immediate +-actions (like killing processes etc.) and mcelog decodes the errors +-and manages various other advanced error responses like +-offlining memory, CPUs or triggering events. In addition +-mcelog also handles corrected errors, by logging and accounting them. +- +-It primarily handles machine checks and thermal events, which +-are reported for errors detected by the CPU. +- +-For more details on what mcelog can do and the underlying theory +-see http://www.mcelog.org +- +-It is recommended that mcelog runs on all x86 machines, both +-64bit (since early 2.6) and 32bit (since 2.6.32) +- +-mcelog can run in several modi: cronjob, trigger, daemon +- +-cronjob is the old method. mcelog runs every 5 minutes from cron and checks +-for errors. Disadvantage of this is that it can delay error reporting +-significantly (upto 10 minutes) and does not allow mcelog to keep extended state. +- +-trigger is a newer method where the kernel runs mcelog on a error. +-This is configured with +-echo /usr/sbin/mcelog > /sys/devices/system/machinecheck/machinecheck0/trigger +-This is faster, but still doesn't allow mcelog to keep state, +-and has relatively high overhead for each error because a program has +-to be initialized from scratch. +- +-In daemon mode mcelog runs continuously as a daemon in the background +-and wait for errors. It is enabled by running mcelog --daemon & +-from a init script. This is the fastest and most feature-ful. +- +-The recommended mode is daemon, because several new functions (like page error +-predictive failure analysis) require a continuously running daemon. +- +-Documentation: +- +-The primary reference documentation are the man pages. +-lk10-mcelog.pdf has a overview over the errors mcelog handles +-(originally from Linux Kongress 2010) +-mce.pdf is a very old paper describing the first releases of mcelog +-(some parts are obsolete) +- +-For distributors: +- +-You can run mcelog from systemd or similar daemons. An example +-systemd unit file is in mcelog.service. +- +-For older distributions using init scripts: +- +-Please install a init script by default that runs mcelog in daemon mode. +-The mcelog.init script is a good starting point. +- +-Also install a logrotated file (mcelog.logrotate) or equivalent +-when mcelog is running in daemon mode. +- +-These two are not in make install. +- +-The installation also requires a config file (/etc/mcelog.conf) and +-the default triggers. These are all installed by "make install" +- +-/dev/mcelog is needed for mcelog operation +-If it's not there it can be created with mknod /dev/mcelog c 10 227 +-Normally it should be created automatically in udev. +- +-Security: +- +-mcelog needs to run as root because it might trigger actions like +-page-offlining, which require CAP_SYS_ADMIN. Also it opens /dev/mcelog +-and a unix socket for client support. +- +-It also opens /dev/mem to parse the BIOS DMI tables. It is careful +-to close the file descriptor and unmap any mappings after using them. +- +-There is support for changing the user in daemon mode after opening +-the device and the sockets, but that would stop triggers from +-doing corrective action that require root. +- +-In principle it would be possible to only keep CAP_SYS_ADMIN +-for page-offling, but that would prevent triggers from doing root +-only actions not covered by it (and CAP_SYS_ADMIN is not that different +-from full root) +- +-In daemon mode mcelog listens to a unix socket and processes +-requests from mcelog --client. This can be disabled in the configuration file. +-The uid/gid of the requestor is checked on access and is configurable +-(default 0/0 only). The command parsing code is very straight forward +-(server.c) The client parsing/reply is currently done with full privileges +-of the daemon. +- +-Testing: +- +-There is a simple test suite in tests/. The test suite requires root to +-run and access to mce-inject and a kernel with MCE injection support +-(CONFIG_X86_MCE_INJECT). It will kill any running mcelog daemon. +- +-Run it with "make test" +- +-The test suite requires the mce-inject tool, available from +-git://git.kernel.org/pub/utils/cpu/mce/mce-inject.git +-The mce-inject executable must be either in $PATH or in the +-../mce-inject directory. +- +-You can also test under valgrind with "make valgrind-test". For +-this valgrind needs to be installed of course. Advanced +-valgrind options can be specified with +-make VALGRIND="valgrind --option" valgrind-test +- +-Other checks: +- +-make iccverify and make clangverify run the static verifiers +-in clang and icc respectively. +- +-License: +- +-This program is licensed under the subject of the GNU Public General +-License, v.2 +- +diff -urNp mcelog-d2e13bf0.orig/README.md mcelog-d2e13bf0/README.md +--- mcelog-d2e13bf0.orig/README.md 1969-12-31 19:00:00.000000000 -0500 ++++ mcelog-d2e13bf0/README.md 2016-11-30 11:24:12.202619289 -0500 +@@ -0,0 +1,129 @@ ++# mcelog ++ ++mcelog is the user space backend for logging machine check errors reported ++by the hardware to the kernel. The kernel does the immediate actions ++(like killing processes etc.) and mcelog decodes the errors and manages ++various other advanced error responses like offlining memory, CPUs or triggering ++events. In addition mcelog also handles corrected errors, by logging and ++accounting them. ++It primarily handles machine checks and thermal events, which are reported ++for errors detected by the CPU. ++ ++For more details on what mcelog can do and the underlying theory ++see [mcelog.org](http://www.mcelog.org). ++ ++It is recommended that mcelog runs on all x86 machines, both 64bit ++(since early 2.6) and 32bit (since 2.6.32). ++ ++mcelog can run in several modes: ++ ++- cronjob ++- trigger ++- daemon ++ ++**cronjob** is the old method. mcelog runs every 5 minutes from cron and checks ++for errors. Disadvantage of this is that it can delay error reporting ++significantly (upto 10 minutes) and does not allow mcelog to keep extended state. ++ ++**trigger** is a newer method where the kernel runs mcelog on a error. ++ ++This is configured with: ++```sh ++echo /usr/sbin/mcelog > /sys/devices/system/machinecheck/machinecheck0/trigger ++``` ++This is faster, but still doesn't allow mcelog to keep state, ++and has relatively high overhead for each error because a program has ++to be initialized from scratch. ++ ++In **daemon** mode mcelog runs continuously as a daemon in the background and ++wait for errors. It is enabled by running `mcelog --daemon &` ++from a init script. This is the fastest and most feature-ful. ++ ++The recommended mode is **daemon**, because several new functions (like page ++error predictive failure analysis) require a continuously running daemon. ++ ++## Documentation ++ ++- The primary reference documentation are the man pages. ++- [lk10-mcelog.pdf](https://github.com/andikleen/mcelog/blob/master/lk10-mcelog.pdf) ++ has a overview over the errors mcelog handles (originally from Linux Kongress 2010). ++- [mce.pdf](https://github.com/mjtrangoni/mcelog/blob/README.md/mce.pdf) ++ is a very old paper describing the first releases of mcelog (some parts are obsolete). ++ ++## For distributors ++ ++You can run mcelog from systemd or similar daemons. An example systemd unit ++file is in `mcelog.service`. ++ ++### For older distributions using init scripts ++ ++Please install an init script by default that runs mcelog in daemon mode. ++The `mcelog.init` script is a good starting point. Also install a ++logrotated file (mcelog.logrotate) or equivalent when mcelog is running ++in daemon mode. ++These two are not in make install. ++ ++The installation also requires a config file `/etc/mcelog.conf` and the default ++triggers. These are all installed by `make install` ++ ++`/dev/mcelog` is needed for mcelog operation. If it's not there it can be ++created with: ++```sh ++mknod /dev/mcelog c 10 227 ++``` ++ ++Normally it should be created automatically in udev. ++ ++## Security ++ ++mcelog needs to run as root because it might trigger actions like ++page-offlining, which require `CAP_SYS_ADMIN`. Also it opens `/dev/mcelog` ++and an UNIX socket for client support. ++ ++It also opens `/dev/mem` to parse the BIOS DMI tables. It is careful to close ++the file descriptor and unmap any mappings after using them. ++ ++There is support for changing the user in daemon mode after opening the device ++and the sockets, but that would stop triggers from doing corrective action ++that require `root`. ++ ++In principle it would be possible to only keep `CAP_SYS_ADMIN` for page-offling, ++but that would prevent triggers from doing root-only actions not covered by ++it (and `CAP_SYS_ADMIN` is not that different from full root) ++ ++In `daemon` mode mcelog listens to a UNIX socket and processes requests from ++`sh mcelog --client`. This can be disabled in the configuration file. ++The uid/gid of the requestor is checked on access and is configurable ++(default 0/0 only). The command parsing code is very straight forward ++(server.c). The client parsing/reply is currently done with full privileges ++of the `daemon`. ++ ++## Testing ++ ++There is a simple test suite in `sh tests/`. The test suite requires root to ++run and access to mce-inject and a kernel with MCE injection support ++`CONFIG_X86_MCE_INJECT`. It will kill any running mcelog daemon. ++ ++Run it with `sh make test`. ++ ++The test suite requires the ++[mce-inject](git://git.kernel.org/pub/utils/cpu/mce/mce-inject.git) tool. ++The `mce-inject` executable must be either in `$PATH` or in the ++`../mce-inject` directory. ++ ++You can also test under **valgrind** with `sh make valgrind-test`. For this ++valgrind needs to be installed of course. Advanced valgrind options can be ++specified with: ++```sh ++make VALGRIND="valgrind --option" valgrind-test ++``` ++ ++### Other checks ++ ++`make iccverify` and `make clangverify` run the static verifiers in *clang* ++and *icc* respectively. ++ ++## License ++ ++This program is licensed under the subject of the GNU Public General ++License, v.2 +diff -urNp mcelog-d2e13bf0.orig/skylake_xeon.c mcelog-d2e13bf0/skylake_xeon.c +--- mcelog-d2e13bf0.orig/skylake_xeon.c 2016-11-30 11:23:54.538909475 -0500 ++++ mcelog-d2e13bf0/skylake_xeon.c 2016-11-30 11:24:12.208619530 -0500 +@@ -23,6 +23,11 @@ + #include "skylake_xeon.h" + #include "memdb.h" + ++/* Memory error was corrected by mirroring with channel failover */ ++#define SKX_MCI_MISC_FO (1ULL<<63) ++/* Memory error was corrected by mirroring and primary channel scrubbed successfully */ ++#define SKX_MCI_MISC_MC (1ULL<<62) ++ + /* See IA32 SDM Vol3B Table 16-27 */ + + static char *pcu_1[] = { +@@ -208,3 +213,18 @@ void skylake_s_decode_model(int cputype, + break; + } + } ++ ++int skylake_s_ce_type(int bank, u64 status, u64 misc) ++{ ++ if (!(bank == 7 || bank == 8)) ++ return 0; ++ ++ if (status & MCI_STATUS_MISCV) { ++ if (misc & SKX_MCI_MISC_FO) ++ return 1; ++ if (misc & SKX_MCI_MISC_MC) ++ return 2; ++ } ++ ++ return 0; ++} +diff -urNp mcelog-d2e13bf0.orig/skylake_xeon.h mcelog-d2e13bf0/skylake_xeon.h +--- mcelog-d2e13bf0.orig/skylake_xeon.h 2016-11-30 11:23:54.539909515 -0500 ++++ mcelog-d2e13bf0/skylake_xeon.h 2016-11-30 11:24:12.208619530 -0500 +@@ -1 +1,2 @@ + void skylake_s_decode_model(int cputype, int bank, u64 status, u64 misc); ++int skylake_s_ce_type(int bank, u64 status, u64 misc); +diff -urNp mcelog-d2e13bf0.orig/sysfs.c mcelog-d2e13bf0/sysfs.c +--- mcelog-d2e13bf0.orig/sysfs.c 2016-11-30 11:23:54.534909314 -0500 ++++ mcelog-d2e13bf0/sysfs.c 2016-11-30 11:24:12.208619530 -0500 +@@ -37,10 +37,10 @@ char *read_field(char *base, char *name) + + asprintf(&fn, "%s/%s", base, name); + fd = open(fn, O_RDONLY); ++ free(fn); + if (fstat(fd, &st) < 0) + goto bad; + buf = xalloc(st.st_size); +- free(fn); + if (fd < 0) + goto bad; + n = read(fd, buf, st.st_size); +@@ -81,10 +81,12 @@ unsigned read_field_map(char *base, char + if (!strcmp(val, map->name)) + break; + } +- free(val); +- if (map->name) ++ if (map->name) { ++ free(val); + return map->value; ++ } + Eprintf("sysfs field %s/%s has unknown string value `%s'\n", base, name, val); ++ free(val); + return -1; + } + +diff -urNp mcelog-d2e13bf0.orig/TODO-diskdb mcelog-d2e13bf0/TODO-diskdb +--- mcelog-d2e13bf0.orig/TODO-diskdb 2016-11-30 11:23:54.530909154 -0500 ++++ mcelog-d2e13bf0/TODO-diskdb 1969-12-31 19:00:00.000000000 -0500 +@@ -1,31 +0,0 @@ +- +-diskdb was a experimental attempt to track errors per DIMM +-on disk. It ran into problems unfortunately. +- +-diskdb is not compiled by default now. It can be enabled with +-make CONFIG_DISKDB=1 +- +-It is replaced with a new memory only database now that +-relies on daemon mode. +- +-Open fundamental issues: +-- DIMM tracking over boot doesn't work due to SMBIOS not reporting +-serial numbers +- +-Code problems: +-- Missing aging +-- For Intel Nehalem CE errors need reverse smbios translation +-- SMBIOS interleaving decoding missing +-- Some crash races in db.c (see comments there) +-- Need lock timeout +-- Default enable/disable heuristics (smbios check etc.) +-- write db test suite (with crash) +- +-General: +-- Missing CPU database +- +-Missing: +-- rename to different name without memory +- +-Old: +-- add ifdef for memory because it's broken +diff -urNp mcelog-d2e13bf0.orig/xeon75xx.c mcelog-d2e13bf0/xeon75xx.c +--- mcelog-d2e13bf0.orig/xeon75xx.c 2016-11-30 11:23:54.537909435 -0500 ++++ mcelog-d2e13bf0/xeon75xx.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,39 +0,0 @@ +-/* Copyright (C) 2009/2010 Intel Corporation +- +- Decode Intel Xeon75xx memory errors. Requires the mce-75xx.ko driver +- load. The core errors are the same as Nehalem. +- +- mcelog is free software; you can redistribute it and/or +- modify it under the terms of the GNU General Public +- License as published by the Free Software Foundation; version +- 2. +- +- mcelog is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- General Public License for more details. +- +- You should find a copy of v2 of the GNU General Public License somewhere +- on your Linux system; if not, write to the Free Software Foundation, +- Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +- +- Author: Andi Kleen +-*/ +- +-#include +-#include +-#include "mcelog.h" +-#include "xeon75xx.h" +- +-/* This used to decode the old xeon 75xx memory error aux format. But that has never +- been merged into mainline kernels, so removed it again. */ +- +-void +-xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm) +-{ +-} +- +- +-void xeon75xx_decode_dimm(struct mce *m, unsigned msize) +-{ +-} +diff -urNp mcelog-d2e13bf0.orig/xeon75xx.h mcelog-d2e13bf0/xeon75xx.h +--- mcelog-d2e13bf0.orig/xeon75xx.h 2016-11-30 11:23:54.537909435 -0500 ++++ mcelog-d2e13bf0/xeon75xx.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,2 +0,0 @@ +-void xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm); +-void xeon75xx_decode_dimm(struct mce *m, unsigned msize); diff --git a/SPECS/mcelog.spec b/SPECS/mcelog.spec index 29a5566..952b3eb 100644 --- a/SPECS/mcelog.spec +++ b/SPECS/mcelog.spec @@ -1,10 +1,10 @@ %define last_tar_git_commit d2e13bf0 -%define last_git_commit e4aca63 +%define last_git_commit 94d853b2ea81 Summary: Tool to translate x86-64 CPU Machine Check Exception data Name: mcelog -Version: 136 -Release: 2.%{last_git_commit}%{?dist} +Version: 144 +Release: 3.%{last_git_commit}%{?dist} Epoch: 3 Group: System Environment/Base License: GPLv2 @@ -23,6 +23,9 @@ Patch4: mcelog-update-9de4924.patch Patch5: mcelog-update-e7e0ac1.patch Patch6: mcelog-patch-1bd2984.patch Patch7: mcelog-update-e4aca63.patch +Patch8: mcelog-update-94d853b2ea81.patch +Patch9: mcelog-patch-e9aeed03f3d1.patch +Patch10: mcelog-patch-cfa11588ad8b.patch URL: https://github.com/andikleen/mcelog.git Buildroot: %{_tmppath}/%{name}-%{version}-root ExclusiveArch: i686 x86_64 @@ -44,7 +47,10 @@ on x86-32 and x86-64 systems. It can be run either as a daemon, or by cron. %patch4 -p1 -b .mcelog-update-9de4924 %patch5 -p1 -b .mcelog-update-e7e0ac1 %patch6 -p1 -b .mcelog-patch-1bd2984 -%patch7 -p1 -b .mcelog-patch-e4aca63 +%patch7 -p1 -b .mcelog-update-e4aca63 +%patch8 -p1 -b .mcelog-update-94d853b2ea81 +%patch9 -p1 -b .mcelog-patch-e9aeed03f3d1 +%patch10 -p1 -b .mcelog-patch-e9aeed03f3d1 %build mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir} @@ -96,7 +102,7 @@ fi %files %defattr(-,root,root,-) -%doc README CHANGES +%doc README.md CHANGES %{_sbindir}/mcelog %dir %{_sysconfdir}/mcelog %{_sysconfdir}/mcelog/triggers @@ -107,8 +113,19 @@ fi %attr(0644,root,root) %{_mandir}/*/* %changelog -* Mon Oct 31 2016 Prarit Bhargava - 3:136.2.e4aca63 -- fix post-uninstall script warning during upgrade [1388427] +* Thu Apr 27 2017 Prarit Bhargava - 3:144.3.94d853b2ea81 +- Fix "warning: 16 bytes ignored in each record" warning [1445809] +* Thu Feb 2 2017 Prarit Bhargava - 3:144.2.94d853b2ea81 +- mcelog: is_cpu_supported() error message must be printed Eprintf [1406626] +* Wed Nov 30 2016 Prarit Bhargava - 3:144.1.94d853b2ea81 +- update NVR to 144 to match upstream +- add Denverton SoC support [1273768] +- add Kabylake U/Y support, 0x8E [1310954] +- add Kabylake H/S support, 0x9E [1310955] +- add Knights Mill support [1381316] +- mcelog didn't remove /var/run/mcelog-client when exitting [1362123] +* Mon Oct 24 2016 Prarit Bhargava - 3:136.2.e4aca63 +- fix post-uninstall script warning during upgrade [1257116] * Fri May 13 2016 Prarit Bhargava - 3:136-1.e4aca63 - update NVR to 136 to match upstream [1336431] - additional general fixes [1336431]