Blame SOURCES/0071-rasdaemon-ras-mc-ctl-add-option-to-show-error-counts.patch

ac32bf
From 60a91e4da4f2daf2b10143fc148a8043312b61e5 Mon Sep 17 00:00:00 2001
ac32bf
From: Aristeu Rozanski <aris@redhat.com>
ac32bf
Date: Wed, 1 Aug 2018 16:29:58 -0400
ac32bf
Subject: [PATCH] rasdaemon: ras-mc-ctl: add option to show error counts
ac32bf
ac32bf
In some scenarios it might not be desirable to have a daemon running
ac32bf
to parse and store the errors provided by EDAC and only having the
ac32bf
number of CEs and UEs is enough. This patch implements this feature
ac32bf
as an ras-mc-ctl option.
ac32bf
ac32bf
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
ac32bf
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
ac32bf
---
ac32bf
 util/ras-mc-ctl.in | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
ac32bf
 1 file changed, 73 insertions(+), 2 deletions(-)
ac32bf
ac32bf
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
ac32bf
index 38b7824..aee431a 100755
ac32bf
--- a/util/ras-mc-ctl.in
ac32bf
+++ b/util/ras-mc-ctl.in
ac32bf
@@ -50,6 +50,8 @@ my %dimm_location = ();
ac32bf
 my %csrow_size  = ();
ac32bf
 my %rank_size   = ();
ac32bf
 my %csrow_ranks = ();
ac32bf
+my %dimm_ce_count = ();
ac32bf
+my %dimm_ue_count = ();
ac32bf
 
ac32bf
 my @layers;
ac32bf
 my @max_pos;
ac32bf
@@ -76,6 +78,7 @@ Usage: $prog [OPTIONS...]
ac32bf
  --layout           Display the memory layout.
ac32bf
  --summary          Presents a summary of the logged errors.
ac32bf
  --errors           Shows the errors stored at the error database.
ac32bf
+ --error-count      Shows the corrected and uncorrected error counts using sysfs.
ac32bf
  --help             This help message.
ac32bf
 EOF
ac32bf
 
ac32bf
@@ -83,7 +86,7 @@ parse_cmdline();
ac32bf
 
ac32bf
 if (  $conf{opt}{mainboard} || $conf{opt}{print_labels}
ac32bf
    || $conf{opt}{register_labels} || $conf{opt}{display_memory_layout}
ac32bf
-   || $conf{opt}{guess_dimm_label}) {
ac32bf
+   || $conf{opt}{guess_dimm_label} || $conf{opt}{error_count}) {
ac32bf
 
ac32bf
     get_mainboard_info();
ac32bf
 
ac32bf
@@ -105,6 +108,9 @@ if (  $conf{opt}{mainboard} || $conf{opt}{print_labels}
ac32bf
     if ($conf{opt}{guess_dimm_label}) {
ac32bf
         guess_dimm_label ();
ac32bf
     }
ac32bf
+    if ($conf{opt}{error_count}) {
ac32bf
+        display_error_count ();
ac32bf
+    }
ac32bf
 }
ac32bf
 
ac32bf
 if ($conf{opt}{status}) {
ac32bf
@@ -134,6 +140,7 @@ sub parse_cmdline
ac32bf
     $conf{opt}{guess_dimm_label} = 0;
ac32bf
     $conf{opt}{summary} = 0;
ac32bf
     $conf{opt}{errors} = 0;
ac32bf
+    $conf{opt}{error_count} = 0;
ac32bf
 
ac32bf
     my $rref = \$conf{opt}{report};
ac32bf
     my $mref = \$conf{opt}{mainboard};
ac32bf
@@ -150,7 +157,8 @@ sub parse_cmdline
ac32bf
                          "status" =>          \$conf{opt}{status},
ac32bf
                          "layout" =>          \$conf{opt}{display_memory_layout},
ac32bf
                          "summary" =>         \$conf{opt}{summary},
ac32bf
-                         "errors" =>          \$conf{opt}{errors}
ac32bf
+                         "errors" =>          \$conf{opt}{errors},
ac32bf
+                         "error-count" =>     \$conf{opt}{error_count}
ac32bf
             );
ac32bf
 
ac32bf
     usage(1) if !$rc;
ac32bf
@@ -284,6 +292,30 @@ sub parse_dimm_nodes
ac32bf
         $dimm_label_file{$str_loc} = $file;
ac32bf
         $dimm_location{$str_loc} = $location;
ac32bf
 
ac32bf
+        my $count;
ac32bf
+
ac32bf
+        $file =~s/dimm_label/dimm_ce_count/;
ac32bf
+        if (-e $file) {
ac32bf
+                open IN, $file;
ac32bf
+                chomp($count = <IN>);
ac32bf
+                close IN;
ac32bf
+        } else {
ac32bf
+                log_error ("dimm_ce_count not found in sysfs. Old kernel?\n");
ac32bf
+                exit -1;
ac32bf
+        }
ac32bf
+        $dimm_ce_count{$str_loc} = $count;
ac32bf
+
ac32bf
+        $file =~s/dimm_ce_count/dimm_ue_count/;
ac32bf
+        if (-e $file) {
ac32bf
+                open IN, $file;
ac32bf
+                chomp($count = <IN>);
ac32bf
+                close IN;
ac32bf
+        } else {
ac32bf
+                log_error ("dimm_ue_count not found in sysfs. Old kernel?\n");
ac32bf
+                exit -1;
ac32bf
+        }
ac32bf
+        $dimm_ue_count{$str_loc} = $count;
ac32bf
+
ac32bf
         return;
ac32bf
     }
ac32bf
 }
ac32bf
@@ -906,6 +938,45 @@ sub display_memory_layout
ac32bf
     dimm_display_mem();
ac32bf
 }
ac32bf
 
ac32bf
+sub display_error_count
ac32bf
+{
ac32bf
+    my $sysfs_dir = "/sys/devices/system/edac/mc";
ac32bf
+    my $key;
ac32bf
+    my $max_width = 0;
ac32bf
+    my %dimm_labels = ();
ac32bf
+
ac32bf
+    find ({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir);
ac32bf
+
ac32bf
+    if (!scalar(keys %dimm_node)) {
ac32bf
+        log_error ("No DIMMs found in /sys or new sysfs EDAC interface not found.\n");
ac32bf
+        exit -1;
ac32bf
+    }
ac32bf
+
ac32bf
+    foreach $key (keys %dimm_node) {
ac32bf
+        my $label_width;
ac32bf
+
ac32bf
+        open IN, $dimm_label_file{$key};
ac32bf
+        chomp(my $label = <IN>);
ac32bf
+        close IN;
ac32bf
+        $label_width = length $label;
ac32bf
+
ac32bf
+        if ($label_width > $max_width) {
ac32bf
+            $max_width = $label_width;
ac32bf
+        }
ac32bf
+        $dimm_labels{$key} = $label;
ac32bf
+    }
ac32bf
+    my $string = "Label";
ac32bf
+    $string .= " " x ($max_width - length $string);
ac32bf
+    print($string . "\tCE\tUE\n");
ac32bf
+
ac32bf
+    foreach $key (keys %dimm_node) {
ac32bf
+        my $ce_count = $dimm_ce_count{$key};
ac32bf
+        my $ue_count = $dimm_ue_count{$key};
ac32bf
+
ac32bf
+        print("$dimm_labels{$key}\t$ce_count\t$ue_count\n");
ac32bf
+    }
ac32bf
+}
ac32bf
+
ac32bf
 sub find_prog
ac32bf
 {
ac32bf
     my ($file) = @_;
ac32bf
-- 
ac32bf
1.8.3.1
ac32bf