krishnanadh / rpms / rasdaemon

Forked from rpms/rasdaemon a year ago
Clone

Blame SOURCES/9acef39f13833f7d53ef96abc5a72e79384260f4.patch

df8b6a
commit 9acef39f13833f7d53ef96abc5a72e79384260f4
df8b6a
Author: Naveen Krishna Chatradhi <nchatrad@amd.com>
df8b6a
Date:   Tue Jun 1 11:01:17 2021 +0530
df8b6a
df8b6a
    rasdaemon: Add new SMCA bank types with error decoding
df8b6a
    
df8b6a
    Upcoming systems with Scalable Machine Check Architecture (SMCA) have
df8b6a
    new MCA banks added.
df8b6a
    
df8b6a
    This patch adds the (HWID, MCATYPE) tuple, name and error decoding for
df8b6a
    those new SMCA banks.
df8b6a
    While at it, optimize the string names in smca_bank_name[].
df8b6a
    
df8b6a
    Signed-off-by: Muralidhara M K <muralimk@amd.com>
df8b6a
    Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
df8b6a
    Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
df8b6a
df8b6a
diff --git a/mce-amd-smca.c b/mce-amd-smca.c
df8b6a
index 7c619fd..e0cf512 100644
df8b6a
--- a/mce-amd-smca.c
df8b6a
+++ b/mce-amd-smca.c
df8b6a
@@ -47,7 +47,7 @@
df8b6a
 /* These may be used by multiple smca_hwid_mcatypes */
df8b6a
 enum smca_bank_types {
df8b6a
 	SMCA_LS = 0,    /* Load Store */
df8b6a
-	SMCA_LS_V2,	/* Load Store */
df8b6a
+	SMCA_LS_V2,
df8b6a
 	SMCA_IF,        /* Instruction Fetch */
df8b6a
 	SMCA_L2_CACHE,  /* L2 Cache */
df8b6a
 	SMCA_DE,        /* Decoder Unit */
df8b6a
@@ -56,17 +56,22 @@ enum smca_bank_types {
df8b6a
 	SMCA_FP,        /* Floating Point */
df8b6a
 	SMCA_L3_CACHE,  /* L3 Cache */
df8b6a
 	SMCA_CS,        /* Coherent Slave */
df8b6a
-	SMCA_CS_V2,     /* Coherent Slave V2 */
df8b6a
+	SMCA_CS_V2,
df8b6a
 	SMCA_PIE,       /* Power, Interrupts, etc. */
df8b6a
 	SMCA_UMC,       /* Unified Memory Controller */
df8b6a
+	SMCA_UMC_V2,
df8b6a
 	SMCA_PB,        /* Parameter Block */
df8b6a
 	SMCA_PSP,       /* Platform Security Processor */
df8b6a
-	SMCA_PSP_V2,    /* Platform Security Processor V2 */
df8b6a
+	SMCA_PSP_V2,
df8b6a
 	SMCA_SMU,       /* System Management Unit */
df8b6a
-	SMCA_SMU_V2,    /* System Management Unit V2 */
df8b6a
+	SMCA_SMU_V2,
df8b6a
 	SMCA_MP5,	/* Microprocessor 5 Unit */
df8b6a
 	SMCA_NBIO,	/* Northbridge IO Unit */
df8b6a
 	SMCA_PCIE,	/* PCI Express Unit */
df8b6a
+	SMCA_PCIE_V2,
df8b6a
+	SMCA_XGMI_PCS,	/* xGMI PCS Unit */
df8b6a
+	SMCA_XGMI_PHY,	/* xGMI PHY Unit */
df8b6a
+	SMCA_WAFL_PHY,	/* WAFL PHY Unit */
df8b6a
 	N_SMCA_BANK_TYPES
df8b6a
 };
df8b6a
 
df8b6a
@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = {
df8b6a
 	"Command/address parity error",
df8b6a
 	"Write data CRC error",
df8b6a
 };
df8b6a
+
df8b6a
+static const char * const smca_umc2_mce_desc[] = {
df8b6a
+	"DRAM ECC error",
df8b6a
+	"Data poison error",
df8b6a
+	"SDP parity error",
df8b6a
+	"Reserved",
df8b6a
+	"Address/Command parity error",
df8b6a
+	"Write data parity error",
df8b6a
+	"DCQ SRAM ECC error",
df8b6a
+	"Reserved",
df8b6a
+	"Read data parity error",
df8b6a
+	"Rdb SRAM ECC error",
df8b6a
+	"RdRsp SRAM ECC error",
df8b6a
+	"LM32 MP errors",
df8b6a
+};
df8b6a
+
df8b6a
 /* Parameter Block */
df8b6a
 static const char * const smca_pb_mce_desc[] = {
df8b6a
 	"Parameter Block RAM ECC error",
df8b6a
@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = {
df8b6a
 	"CCIX Non-okay write response with data error",
df8b6a
 };
df8b6a
 
df8b6a
+static const char * const smca_pcie2_mce_desc[] = {
df8b6a
+	"SDP Parity Error logging",
df8b6a
+};
df8b6a
+
df8b6a
+static const char * const smca_xgmipcs_mce_desc[] = {
df8b6a
+	"Data Loss Error",
df8b6a
+	"Training Error",
df8b6a
+	"Flow Control Acknowledge Error",
df8b6a
+	"Rx Fifo Underflow Error",
df8b6a
+	"Rx Fifo Overflow Error",
df8b6a
+	"CRC Error",
df8b6a
+	"BER Exceeded Error",
df8b6a
+	"Tx Vcid Data Error",
df8b6a
+	"Replay Buffer Parity Error",
df8b6a
+	"Data Parity Error",
df8b6a
+	"Replay Fifo Overflow Error",
df8b6a
+	"Replay Fifo Underflow Error",
df8b6a
+	"Elastic Fifo Overflow Error",
df8b6a
+	"Deskew Error",
df8b6a
+	"Flow Control CRC Error",
df8b6a
+	"Data Startup Limit Error",
df8b6a
+	"FC Init Timeout Error",
df8b6a
+	"Recovery Timeout Error",
df8b6a
+	"Ready Serial Timeout Error",
df8b6a
+	"Ready Serial Attempt Error",
df8b6a
+	"Recovery Attempt Error",
df8b6a
+	"Recovery Relock Attempt Error",
df8b6a
+	"Replay Attempt Error",
df8b6a
+	"Sync Header Error",
df8b6a
+	"Tx Replay Timeout Error",
df8b6a
+	"Rx Replay Timeout Error",
df8b6a
+	"LinkSub Tx Timeout Error",
df8b6a
+	"LinkSub Rx Timeout Error",
df8b6a
+	"Rx CMD Pocket Error",
df8b6a
+};
df8b6a
+
df8b6a
+static const char * const smca_xgmiphy_mce_desc[] = {
df8b6a
+	"RAM ECC Error",
df8b6a
+	"ARC instruction buffer parity error",
df8b6a
+	"ARC data buffer parity error",
df8b6a
+	"PHY APB error",
df8b6a
+};
df8b6a
+
df8b6a
+static const char * const smca_waflphy_mce_desc[] = {
df8b6a
+	"RAM ECC Error",
df8b6a
+	"ARC instruction buffer parity error",
df8b6a
+	"ARC data buffer parity error",
df8b6a
+	"PHY APB error",
df8b6a
+};
df8b6a
 
df8b6a
 struct smca_mce_desc {
df8b6a
 	const char * const *descs;
df8b6a
@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
df8b6a
 	[SMCA_CS_V2]    = { smca_cs2_mce_desc,  ARRAY_SIZE(smca_cs2_mce_desc) },
df8b6a
 	[SMCA_PIE]      = { smca_pie_mce_desc,  ARRAY_SIZE(smca_pie_mce_desc) },
df8b6a
 	[SMCA_UMC]      = { smca_umc_mce_desc,  ARRAY_SIZE(smca_umc_mce_desc) },
df8b6a
+	[SMCA_UMC_V2]	= { smca_umc2_mce_desc,	ARRAY_SIZE(smca_umc2_mce_desc)	},
df8b6a
 	[SMCA_PB]       = { smca_pb_mce_desc,   ARRAY_SIZE(smca_pb_mce_desc)  },
df8b6a
 	[SMCA_PSP]      = { smca_psp_mce_desc,  ARRAY_SIZE(smca_psp_mce_desc) },
df8b6a
 	[SMCA_PSP_V2]   = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)},
df8b6a
@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
df8b6a
 	[SMCA_MP5]      = { smca_mp5_mce_desc,  ARRAY_SIZE(smca_mp5_mce_desc) },
df8b6a
 	[SMCA_NBIO]     = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)},
df8b6a
 	[SMCA_PCIE]     = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)},
df8b6a
+	[SMCA_PCIE_V2]	= { smca_pcie2_mce_desc,   ARRAY_SIZE(smca_pcie2_mce_desc)	},
df8b6a
+	[SMCA_XGMI_PCS]	= { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc)	},
df8b6a
+	[SMCA_XGMI_PHY]	= { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc)	},
df8b6a
+	[SMCA_WAFL_PHY]	= { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc)	},
df8b6a
 };
df8b6a
 
df8b6a
 struct smca_hwid {
df8b6a
@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
df8b6a
 
df8b6a
 	/* Unified Memory Controller MCA type */
df8b6a
 	{ SMCA_UMC,      0x00000096 },
df8b6a
+	/* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */
df8b6a
+	{ SMCA_UMC_V2,   0x00010096 },
df8b6a
 
df8b6a
 	/* Parameter Block MCA type */
df8b6a
 	{ SMCA_PB,       0x00000005 },
df8b6a
@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
df8b6a
 
df8b6a
 	/* PCI Express Unit MCA type */
df8b6a
 	{ SMCA_PCIE,     0x00000046 },
df8b6a
+	{ SMCA_PCIE_V2,  0x00010046 },
df8b6a
+
df8b6a
+	/* Ext Global Memory Interconnect PCS MCA type */
df8b6a
+	{ SMCA_XGMI_PCS, 0x00000050 },
df8b6a
+
df8b6a
+	/* Ext Global Memory Interconnect PHY MCA type */
df8b6a
+	{ SMCA_XGMI_PHY, 0x00000259 },
df8b6a
+
df8b6a
+	/* WAFL PHY MCA type */
df8b6a
+	{ SMCA_WAFL_PHY, 0x00000267 },
df8b6a
 };
df8b6a
 
df8b6a
 struct smca_bank_name {
df8b6a
@@ -396,27 +483,28 @@ struct smca_bank_name {
df8b6a
 };
df8b6a
 
df8b6a
 static struct smca_bank_name smca_names[] = {
df8b6a
-	[SMCA_LS]       = { "Load Store Unit" },
df8b6a
-	[SMCA_LS_V2]    = { "Load Store Unit" },
df8b6a
-	[SMCA_IF]       = { "Instruction Fetch Unit" },
df8b6a
-	[SMCA_L2_CACHE] = { "L2 Cache" },
df8b6a
-	[SMCA_DE]       = { "Decode Unit" },
df8b6a
-	[SMCA_RESERVED] = { "Reserved" },
df8b6a
-	[SMCA_EX]       = { "Execution Unit" },
df8b6a
-	[SMCA_FP]       = { "Floating Point Unit" },
df8b6a
-	[SMCA_L3_CACHE] = { "L3 Cache" },
df8b6a
-	[SMCA_CS]       = { "Coherent Slave" },
df8b6a
-	[SMCA_CS_V2]    = { "Coherent Slave" },
df8b6a
-	[SMCA_PIE]      = { "Power, Interrupts, etc." },
df8b6a
-	[SMCA_UMC]      = { "Unified Memory Controller" },
df8b6a
-	[SMCA_PB]       = { "Parameter Block" },
df8b6a
-	[SMCA_PSP]      = { "Platform Security Processor" },
df8b6a
-	[SMCA_PSP_V2]   = { "Platform Security Processor" },
df8b6a
-	[SMCA_SMU]      = { "System Management Unit" },
df8b6a
-	[SMCA_SMU_V2]   = { "System Management Unit" },
df8b6a
-	[SMCA_MP5]	= { "Microprocessor 5 Unit" },
df8b6a
-	[SMCA_NBIO]     = { "Northbridge IO Unit" },
df8b6a
-	[SMCA_PCIE]     = { "PCI Express Unit" },
df8b6a
+	[SMCA_LS ... SMCA_LS_V2]	= { "Load Store Unit" },
df8b6a
+	[SMCA_IF]			= { "Instruction Fetch Unit" },
df8b6a
+	[SMCA_L2_CACHE]			= { "L2 Cache" },
df8b6a
+	[SMCA_DE]			= { "Decode Unit" },
df8b6a
+	[SMCA_RESERVED]			= { "Reserved" },
df8b6a
+	[SMCA_EX]			= { "Execution Unit" },
df8b6a
+	[SMCA_FP]			= { "Floating Point Unit" },
df8b6a
+	[SMCA_L3_CACHE]			= { "L3 Cache" },
df8b6a
+	[SMCA_CS ... SMCA_CS_V2]	= { "Coherent Slave" },
df8b6a
+	[SMCA_PIE]			= { "Power, Interrupts, etc." },
df8b6a
+	[SMCA_UMC]			= { "Unified Memory Controller" },
df8b6a
+	[SMCA_UMC_V2]			= { "Unified Memory Controller V2" },
df8b6a
+	[SMCA_PB]			= { "Parameter Block" },
df8b6a
+	[SMCA_PSP ... SMCA_PSP_V2]	= { "Platform Security Processor" },
df8b6a
+	[SMCA_SMU ... SMCA_SMU_V2]	= { "System Management Unit" },
df8b6a
+	[SMCA_MP5]			= { "Microprocessor 5 Unit" },
df8b6a
+	[SMCA_NBIO]			= { "Northbridge IO Unit" },
df8b6a
+	[SMCA_PCIE ... SMCA_PCIE_V2]	= { "PCI Express Unit" },
df8b6a
+	[SMCA_XGMI_PCS]			= { "Ext Global Memory Interconnect PCS Unit" },
df8b6a
+	[SMCA_XGMI_PHY]			= { "Ext Global Memory Interconnect PHY Unit" },
df8b6a
+	[SMCA_WAFL_PHY]			= { "WAFL PHY Unit" },
df8b6a
+
df8b6a
 };
df8b6a
 
df8b6a
 static void amd_decode_errcode(struct mce_event *e)