diff --git a/.rasdaemon.metadata b/.rasdaemon.metadata index a3cbaea..cfbe66d 100644 --- a/.rasdaemon.metadata +++ b/.rasdaemon.metadata @@ -1 +1 @@ -667c81ed421c236c910e8e6a51ca1fff8775f5a8 SOURCES/rasdaemon-0.6.6.tar.bz2 +8ae34f40b676a0843be6647854b950f45161e7d4 SOURCES/rasdaemon-0.6.7.tar.bz2 diff --git a/SOURCES/059a901e97f4091e31c50ce55027daf707638f8d.patch b/SOURCES/059a901e97f4091e31c50ce55027daf707638f8d.patch deleted file mode 100644 index 8ac97ad..0000000 --- a/SOURCES/059a901e97f4091e31c50ce55027daf707638f8d.patch +++ /dev/null @@ -1,50 +0,0 @@ -commit 059a901e97f4091e31c50ce55027daf707638f8d -Author: dann frazier -Date: Tue Apr 21 15:56:04 2020 -0600 - - ras-mc-ctl: PCIe AER: display PCIe dev name - - Storage of PCIe dev name was added in commit 8e96ca2c1c59 ("rasdaemon: - store PCIe dev name and TLP header for the aer event"). This makes - ras-mc-ctl extract and emit it like so: - - PCIe AER events: - 1 2020-04-16 22:09:48 +0000 0000:0b:00.0 Corrected error: Receiver Error - 2 2020-04-16 22:23:24 +0000 0000:0b:00.0 Corrected error: Receiver Error - 3 2020-04-17 23:00:37 +0000 0000:d9:01.0 Corrected error: Advisory Non-Fatal, BIT15 - 4 2020-04-17 23:21:52 +0000 0000:d9:01.0 Corrected error: Advisory Non-Fatal - 5 2020-04-18 02:04:24 +0000 0000:5e:00.0 Corrected error: Receiver Error - - Signed-off-by: Dann Frazier - Tested-by: Shiju Jose - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 8d6d866..665a042 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -1230,7 +1230,7 @@ sub summary - sub errors - { - require DBI; -- my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); -+ my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); - my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); - my ($bus_name, $dev_name, $driver_name, $reporter_name); -@@ -1259,13 +1259,13 @@ sub errors - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; -+ $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id"; - $query_handle = $dbh->prepare($query); - $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $type, $msg)); -+ $query_handle->bind_columns(\($id, $time, $devname, $type, $msg)); - $out = ""; - while($query_handle->fetch()) { -- $out .= "$id $time $type error: $msg\n"; -+ $out .= "$id $time $devname $type error: $msg\n"; - } - if ($out ne "") { - print "PCIe AER events:\n$out\n"; diff --git a/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch b/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch deleted file mode 100644 index c49aea5..0000000 --- a/SOURCES/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch +++ /dev/null @@ -1,83 +0,0 @@ -commit 0862a096c3a1d0f993703ab3299f1ddfadf53d7f -Author: Shiju Jose -Date: Tue Aug 11 13:31:46 2020 +0100 - - rasdaemon: ras-mc-ctl: Add ARM processor error information - - Add supporting ARM processor error in the ras-mc-ctl tool. - - Signed-off-by: Shiju Jose - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index ff38143..dd7d56f 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -1123,6 +1123,7 @@ sub summary - my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); - my ($etype, $severity, $etype_string, $severity_string); - my ($dev_name, $dev); -+ my ($affinity, $mpidr); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1158,6 +1159,22 @@ sub summary - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # extlog errors - $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; - $query_handle = $dbh->prepare($query); -@@ -1235,6 +1252,7 @@ sub errors - my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); - my ($bus_name, $dev_name, $driver_name, $reporter_name); - my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); -+ my ($error_count, $affinity, $mpidr, $r_state, $psci_state); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1274,6 +1292,28 @@ sub errors - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # Extlog errors - $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; - $query_handle = $dbh->prepare($query); diff --git a/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch b/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch deleted file mode 100644 index 068fbdb..0000000 --- a/SOURCES/546cf713f667437fb6e283cc3dc090679eb47d08.patch +++ /dev/null @@ -1,525 +0,0 @@ -commit 546cf713f667437fb6e283cc3dc090679eb47d08 -Author: Subhendu Saha -Date: Tue Jan 12 03:29:55 2021 -0500 - - Fix ras-mc-ctl script. - - When rasdaemon is compiled without enabling aer, mce, devlink, - etc., those tables are not created in the database file. Then - ras-mc-ctl script breaks trying to query data from non-existent - tables. - - Signed-off-by: Subhendu Saha subhends@akamai.com - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index dd7d56f..1fbeb63 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -41,6 +41,20 @@ my $sysconfdir = "@sysconfdir@"; - my $dmidecode = find_prog ("dmidecode"); - my $modprobe = find_prog ("modprobe") or exit (1); - -+my $has_aer = 0; -+my $has_arm = 0; -+my $has_devlink = 0; -+my $has_disk_errors = 0; -+my $has_extlog = 0; -+my $has_mce = 0; -+ -+@WITH_AER_TRUE@$has_aer = 1; -+@WITH_ARM_TRUE@$has_arm = 1; -+@WITH_DEVLINK_TRUE@$has_devlink = 1; -+@WITH_DISKERROR_TRUE@$has_disk_errors = 1; -+@WITH_EXTLOG_TRUE@$has_extlog = 1; -+@WITH_MCE_TRUE@$has_mce = 1; -+ - my %conf = (); - my %bus = (); - my %dimm_size = (); -@@ -1144,102 +1158,114 @@ sub summary - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($err_type, $msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $err_type errors: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events summary:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($err_type, $msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $err_type errors: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events summary:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($affinity, $mpidr, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count errors\n"; -- } -- if ($out ne "") { -- print "ARM processor events summary:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # extlog errors -- $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($etype, $severity, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "\t$count $etype_string $severity_string errors\n"; -- } -- if ($out ne "") { -- print "Extlog records summary:\n$out"; -- } else { -- print "No Extlog errors.\n\n"; -+ if ($has_extlog == 1) { -+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($etype, $severity, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "\t$count $etype_string $severity_string errors\n"; -+ } -+ if ($out ne "") { -+ print "Extlog records summary:\n$out"; -+ } else { -+ print "No Extlog errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # devlink errors -- $query = "select dev_name, count(*) from devlink_event group by dev_name"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($dev_name, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$dev_name has $count errors\n"; -- } -- if ($out ne "") { -- print "Devlink records summary:\n$out"; -- } else { -- print "No devlink errors.\n"; -+ if ($has_devlink == 1) { -+ $query = "select dev_name, count(*) from devlink_event group by dev_name"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($dev_name, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$dev_name has $count errors\n"; -+ } -+ if ($out ne "") { -+ print "Devlink records summary:\n$out"; -+ } else { -+ print "No devlink errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # Disk errors -- $query = "select dev, count(*) from disk_errors group by dev"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($dev, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$dev has $count errors\n"; -- } -- if ($out ne "") { -- print "Disk errors summary:\n$out"; -- } else { -- print "No disk errors.\n"; -+ if ($has_disk_errors == 1) { -+ $query = "select dev, count(*) from disk_errors group by dev"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($dev, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$dev has $count errors\n"; -+ } -+ if ($out ne "") { -+ print "Disk errors summary:\n$out"; -+ } else { -+ print "No disk errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select error_msg, count(*) from mce_record group by error_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $msg errors\n"; -- } -- if ($out ne "") { -- print "MCE records summary:\n$out"; -- } else { -- print "No MCE errors.\n"; -+ if ($has_mce == 1) { -+ $query = "select error_msg, count(*) from mce_record group by error_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $msg errors\n"; -+ } -+ if ($out ne "") { -+ print "MCE records summary:\n$out"; -+ } else { -+ print "No MCE errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } -@@ -1277,150 +1303,162 @@ sub errors - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $devname, $type, $msg)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time $devname $type error: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $devname, $type, $msg)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time $devname $type error: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $timestamp error: "; -- $out .= "error_count=$error_count, " if ($error_count); -- $out .= "affinity_level=$affinity, "; -- $out .= sprintf "mpidr=0x%x, ", $mpidr; -- $out .= sprintf "running_state=0x%x, ", $r_state; -- $out .= sprintf "psci_state=0x%x", $psci_state; -- $out .= "\n"; -- } -- if ($out ne "") { -- print "ARM processor events:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # Extlog errors -- $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "$id $timestamp error: "; -- $out .= "type=$etype_string, "; -- $out .= "severity=$severity_string, "; -- $out .= sprintf "address=0x%08x, ", $addr; -- $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -- $out .= "fru_text='$fru_text', "; -- $out .= get_cper_data_text($cper_data) if ($cper_data); -- $out .= "\n"; -- } -- if ($out ne "") { -- print "Extlog events:\n$out\n"; -- } else { -- print "No Extlog errors.\n\n"; -+ if ($has_extlog == 1) { -+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "$id $timestamp error: "; -+ $out .= "type=$etype_string, "; -+ $out .= "severity=$severity_string, "; -+ $out .= sprintf "address=0x%08x, ", $addr; -+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -+ $out .= "fru_text='$fru_text', "; -+ $out .= get_cper_data_text($cper_data) if ($cper_data); -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "Extlog events:\n$out\n"; -+ } else { -+ print "No Extlog errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # devlink errors -- $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $timestamp error: "; -- $out .= "bus_name=$bus_name, "; -- $out .= "dev_name=$dev_name, "; -- $out .= "driver_name=$driver_name, "; -- $out .= "reporter_name=$reporter_name, "; -- $out .= "message='$msg', "; -- $out .= "\n"; -- } -- if ($out ne "") { -- print "Devlink events:\n$out\n"; -- } else { -- print "No devlink errors.\n\n"; -+ if ($has_devlink == 1) { -+ $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "bus_name=$bus_name, "; -+ $out .= "dev_name=$dev_name, "; -+ $out .= "driver_name=$driver_name, "; -+ $out .= "reporter_name=$reporter_name, "; -+ $out .= "message='$msg', "; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "Devlink events:\n$out\n"; -+ } else { -+ print "No devlink errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # Disk errors -- $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $timestamp error: "; -- $out .= "dev=$dev, "; -- $out .= "sector=$sector, "; -- $out .= "nr_sector=$nr_sector, "; -- $out .= "error='$error', "; -- $out .= "rwbs='$rwbs', "; -- $out .= "cmd='$cmd', "; -- $out .= "\n"; -- } -- if ($out ne "") { -- print "Disk errors\n$out\n"; -- } else { -- print "No disk errors.\n\n"; -+ if ($has_disk_errors == 1) { -+ $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "dev=$dev, "; -+ $out .= "sector=$sector, "; -+ $out .= "nr_sector=$nr_sector, "; -+ $out .= "error='$error', "; -+ $out .= "rwbs='$rwbs', "; -+ $out .= "cmd='$cmd', "; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "Disk errors\n$out\n"; -+ } else { -+ print "No disk errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time error: $msg"; -- $out .= ", CPU $cpuvendor" if ($cpuvendor); -- $out .= ", bank $bank_name" if ($bank_name); -- $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -- $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -- $out .= ", $mc_location" if ($mc_location); -- $out .= ", $user_action" if ($user_action); -- $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -- $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -- $out .= sprintf ", status=0x%08x", $status if ($status); -- $out .= sprintf ", addr=0x%08x", $addr if ($addr); -- $out .= sprintf ", misc=0x%08x", $misc if ($misc); -- $out .= sprintf ", ip=0x%08x", $ip if ($ip); -- $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -- $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -- $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -- $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -- $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -- $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -- $out .= sprintf ", cs=0x%08x", $cs if ($cs); -- $out .= sprintf ", bank=0x%08x", $bank if ($bank); -- -- $out .= "\n"; -- } -- if ($out ne "") { -- print "MCE events:\n$out\n"; -- } else { -- print "No MCE errors.\n\n"; -+ if ($has_mce == 1) { -+ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time error: $msg"; -+ $out .= ", CPU $cpuvendor" if ($cpuvendor); -+ $out .= ", bank $bank_name" if ($bank_name); -+ $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -+ $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -+ $out .= ", $mc_location" if ($mc_location); -+ $out .= ", $user_action" if ($user_action); -+ $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -+ $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -+ $out .= sprintf ", status=0x%08x", $status if ($status); -+ $out .= sprintf ", addr=0x%08x", $addr if ($addr); -+ $out .= sprintf ", misc=0x%08x", $misc if ($misc); -+ $out .= sprintf ", ip=0x%08x", $ip if ($ip); -+ $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -+ $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -+ $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -+ $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -+ $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -+ $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -+ $out .= sprintf ", cs=0x%08x", $cs if ($cs); -+ $out .= sprintf ", bank=0x%08x", $bank if ($bank); -+ -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "MCE events:\n$out\n"; -+ } else { -+ print "No MCE errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } diff --git a/SOURCES/5d00690583860313916825ce891a7b0d8005a0f9.patch b/SOURCES/5d00690583860313916825ce891a7b0d8005a0f9.patch deleted file mode 100644 index f445c23..0000000 --- a/SOURCES/5d00690583860313916825ce891a7b0d8005a0f9.patch +++ /dev/null @@ -1,26 +0,0 @@ -commit 5d00690583860313916825ce891a7b0d8005a0f9 -Author: Cong Wang -Date: Fri Feb 28 12:37:15 2020 -0800 - - Match rankX in ras-mc-ctl - - According to kernel doc: - https://www.kernel.org/doc/html/v4.10/admin-guide/ras.html - mcX directory contains either dimmX or rankX directories. - - Signed-off-by: Cong Wang - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 54ab180..ff38143 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -247,7 +247,7 @@ sub parse_dimm_nodes - $mc =~ s,.*mc(\d+).*,$1,; - - my $dimm = $file; -- $dimm =~ s,.*dimm(\d+).*,$1,; -+ $dimm =~ s,.*(rank|dimm)(\d+).*,$2,; - - open IN, $file; - my $location = ; diff --git a/SOURCES/c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3.patch b/SOURCES/c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3.patch deleted file mode 100644 index 30c2225..0000000 --- a/SOURCES/c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3.patch +++ /dev/null @@ -1,38 +0,0 @@ -commit c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3 -Author: lvying6 -Date: Sat Oct 31 17:57:15 2020 +0800 - - ras-page-isolation: page which is PAGE_OFFLINE_FAILED can be offlined again - - OS may fail to offline page at the previous time. After some time, - this page's state changed, and the page can be offlined by OS. - At this time, Correctable errors on this page reached the threshold. - Rasdaemon should trigger to offline this page again. - - Signed-off-by: lvying6 - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index dc07545..fd7bd70 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -237,12 +237,17 @@ static void page_offline(struct page_record *pr) - int ret; - - /* Offlining page is not required */ -- if (offline <= OFFLINE_ACCOUNT) -+ if (offline <= OFFLINE_ACCOUNT) { -+ log(TERM, LOG_INFO, "PAGE_CE_ACTION=%s, ignore to offline page at %#llx\n", -+ offline_choice[offline].name, addr); - return; -+ } - - /* Ignore offlined pages */ -- if (pr->offlined != PAGE_ONLINE) -+ if (pr->offlined == PAGE_OFFLINE) { -+ log(TERM, LOG_INFO, "page at %#llx is already offlined, ignore\n", addr); - return; -+ } - - /* Time to silence this noisy page */ - if (offline == OFFLINE_SOFT_THEN_HARD) { diff --git a/SOURCES/d98326e8b6b2ac65ca64b1c7a71ba019232d3a7b.patch b/SOURCES/d98326e8b6b2ac65ca64b1c7a71ba019232d3a7b.patch deleted file mode 100644 index 1a1426c..0000000 --- a/SOURCES/d98326e8b6b2ac65ca64b1c7a71ba019232d3a7b.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit d98326e8b6b2ac65ca64b1c7a71ba019232d3a7b -Author: Cong Wang -Date: Thu Feb 27 16:24:06 2020 -0800 - - Fix a typo in ras-mc-ctl - - Signed-off-by: Cong Wang - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 665a042..54ab180 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -270,8 +270,8 @@ sub parse_dimm_nodes - for (my $i = 1; $i < scalar(@temp); $i += 2) { - $pos[$i / 2] = $temp[$i]; - -- if ($pos[$i / 2] > $max_pos[$i / 2]) { -- $max_pos[$i / 2 + 1] = $pos[$i / 2]; -+ if ($pos[$i / 2] > $max_pos[$i / 2 + 1]) { -+ $max_pos[$i / 2 + 1] = $pos[$i / 2]; - } - } - if ($mc > $max_pos[0]) { diff --git a/SOURCES/e4d27840e173491ab29c2d97017da9344e2c2526.patch b/SOURCES/e4d27840e173491ab29c2d97017da9344e2c2526.patch deleted file mode 100644 index dc3bc6d..0000000 --- a/SOURCES/e4d27840e173491ab29c2d97017da9344e2c2526.patch +++ /dev/null @@ -1,98 +0,0 @@ -commit e4d27840e173491ab29c2d97017da9344e2c2526 -Author: lvying -Date: Sat Oct 31 17:57:14 2020 +0800 - - ras-page-isolation: do_page_offline always considers page offline was successful - - do_page_offline always consider page offline was successful even if - kernel soft/hard offline page failed. - - Calling rasdaemon with: - - /etc/sysconfig/rasdaemon PAGE_CE_THRESHOLD="1" - - i.e when a page's address occurs Corrected Error, rasdaemon should - trigger this page soft offline. - - However, after adding a livepatch into kernel's - store_soft_offline_page to observe this function's return value, - when injecting a CE into address 0x3f7ec30000, the Kernel - lot reports: - - soft_offline: 0x3f7ec30: unknown non LRU page type ffffe0000000000 () - [store_soft_offline_page]return from soft_offline_page: -5 - - While rasdaemon log reports: - - rasdaemon[73711]: cpu 00:rasdaemon: Corrected Errors at 0x3f7ec30000 exceed threshold - rasdaemon[73711]: rasdaemon: Result of offlining page at 0x3f7ec30000: offlined - - using strace to record rasdaemon's system call, it reports: - - strace -p 73711 - openat(AT_FDCWD, "/sys/devices/system/memory/soft_offline_page", - O_WRONLY|O_CREAT|O_TRUNC, 0666) = 28 - fstat(28, {st_mode=S_IFREG|0200, st_size=4096, ...}) = 0 - write(28, "0x3f7ec30000", 12) = -1 EIO (Input/output error) - close(28) = 0 - - So, kernel actually soft offline pfn 0x3f7ec30 failed and - store_soft_offline_page returned -EIO. However, rasdaemon always - considers the page offline to be successful. - - According to strace display, ferror was unable of detecting the - failure of the write syscall. - - This patch changes fopen-fprintf-ferror-fclose process to use - the lower I/O level, by using instead open-write-close, which - can detect such syscall failure. - - Signed-off-by: lvying - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index 50e4406..dc07545 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -17,6 +17,9 @@ - #include - #include - #include -+#include -+#include -+#include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -@@ -210,18 +213,22 @@ void ras_page_account_init(void) - - static int do_page_offline(unsigned long long addr, enum otype type) - { -- FILE *offline_file; -- int err; -+ int fd, rc; -+ char buf[20]; - -- offline_file = fopen(kernel_offline[type], "w"); -- if (!offline_file) -+ fd = open(kernel_offline[type], O_WRONLY); -+ if (fd == -1) { -+ log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, kernel_offline[type]); - return -1; -+ } - -- fprintf(offline_file, "%#llx", addr); -- err = ferror(offline_file) ? -1 : 0; -- fclose(offline_file); -- -- return err; -+ sprintf(buf, "%#llx", addr); -+ rc = write(fd, buf, strlen(buf)); -+ if (rc < 0) { -+ log(TERM, LOG_ERR, "page offline addr(%s) by %s failed, errno:%d\n", buf, kernel_offline[type], errno); -+ } -+ close(fd); -+ return rc; - } - - static void page_offline(struct page_record *pr) diff --git a/SPECS/rasdaemon.spec b/SPECS/rasdaemon.spec index ef055a2..5848e1a 100644 --- a/SPECS/rasdaemon.spec +++ b/SPECS/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon -Version: 0.6.6 -Release: 2%{?dist} +Version: 0.6.7 +Release: 1%{?dist} Summary: Utility to receive RAS error tracings License: GPLv2 URL: http://git.infradead.org/users/mchehab/rasdaemon.git @@ -24,18 +24,6 @@ Requires(post): systemd Requires(preun): systemd Requires(postun): systemd -# twitter patches -Patch1: d98326e8b6b2ac65ca64b1c7a71ba019232d3a7b.patch -Patch2: 5d00690583860313916825ce891a7b0d8005a0f9.patch -# PCIe display fix -Patch3: 0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch -Patch4: 059a901e97f4091e31c50ce55027daf707638f8d.patch -# Page offlining patches -Patch5: e4d27840e173491ab29c2d97017da9344e2c2526.patch -Patch6: c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3.patch -# Ras-mc-ctl fixes -Patch7: 546cf713f667437fb6e283cc3dc090679eb47d08.patch - %description %{name} is a RAS (Reliability, Availability and Serviceability) logging tool. It currently records memory errors, using the EDAC tracing events. @@ -48,19 +36,12 @@ an utility for reporting current error counts from the EDAC sysfs files. %prep %setup -q -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 %build %ifarch %{arm} aarch64 -%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-non-standard --enable-arm --enable-hisi-ns-decode +%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-non-standard --enable-arm --enable-hisi-ns-decode --with-sysconfdefdir="%{_sysconfdir}/sysconfig" %else -%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-memory-ce-pfa +%configure --enable-sqlite3 --enable-aer --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-memory-ce-pfa --with-sysconfdefdir="%{_sysconfdir}/sysconfig" %endif make %{?_smp_mflags} @@ -76,13 +57,16 @@ rm INSTALL %{buildroot}/usr/include/*.h %{_sbindir}/ras-mc-ctl %{_mandir}/*/* %{_unitdir}/*.service -%{_sharedstatedir}/rasdaemon %{_sysconfdir}/ras/dimm_labels.d %ifnarch %{arm} aarch64 %{_sysconfdir}/sysconfig/rasdaemon %endif %changelog +* Thu Jun 18 2021 David Johansen - 0.6.7-1 +- Build release 0.6.7 and remove backport patches +- shardstatdir/rasdaemon is now created at runtime. + * Thu Apr 01 2021 David Johansen - 0.6.6-2 - Include later patches for ras-mc-ctl fixes