Blame SOURCES/sos-bz1930181-collect-cleaning-consistency.patch

2ed6e8
From fc0218638f3e865c4315823e72aef2f46d012d07 Mon Sep 17 00:00:00 2001
2ed6e8
From: Jake Hunsaker <jhunsake@redhat.com>
2ed6e8
Date: Wed, 14 Apr 2021 11:55:03 -0400
2ed6e8
Subject: [PATCH 1/2] [clean] Load maps from all archives before obfuscation
2ed6e8
 loop
2ed6e8
2ed6e8
Previously, maps were being prepped via archives after extraction. This
2ed6e8
reduced the amount of file IO being done, but made it so that necessary
2ed6e8
obfuscations from later archives in a series would not be obfuscated in
2ed6e8
the archives obfuscated before those later archives were extracted.
2ed6e8
2ed6e8
Fix this by extracting the map prep files into memory for each archive
2ed6e8
to prep the maps before we enter the obfuscation loop entirely.
2ed6e8
2ed6e8
Closes: #2490
2ed6e8
Related: RHBZ#1930181
2ed6e8
Resolves: #2492
2ed6e8
2ed6e8
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
2ed6e8
---
2ed6e8
 sos/cleaner/__init__.py                | 69 +++++++++++++++-----------
2ed6e8
 sos/cleaner/parsers/username_parser.py | 13 +++--
2ed6e8
 2 files changed, 45 insertions(+), 37 deletions(-)
2ed6e8
2ed6e8
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
2ed6e8
index b9eb61ef..d10cdc55 100644
2ed6e8
--- a/sos/cleaner/__init__.py
2ed6e8
+++ b/sos/cleaner/__init__.py
2ed6e8
@@ -292,6 +292,7 @@ third party.
2ed6e8
 
2ed6e8
         # we have at least one valid target to obfuscate
2ed6e8
         self.completed_reports = []
2ed6e8
+        self.preload_all_archives_into_maps()
2ed6e8
         self.obfuscate_report_paths()
2ed6e8
 
2ed6e8
         if not self.completed_reports:
2ed6e8
@@ -473,6 +474,44 @@ third party.
2ed6e8
             self.ui_log.info("Exiting on user cancel")
2ed6e8
             os._exit(130)
2ed6e8
 
2ed6e8
+    def preload_all_archives_into_maps(self):
2ed6e8
+        """Before doing the actual obfuscation, if we have multiple archives
2ed6e8
+        to obfuscate then we need to preload each of them into the mappings
2ed6e8
+        to ensure that node1 is obfuscated in node2 as well as node2 being
2ed6e8
+        obfuscated in node1's archive.
2ed6e8
+        """
2ed6e8
+        self.log_info("Pre-loading multiple archives into obfuscation maps")
2ed6e8
+        for _arc in self.report_paths:
2ed6e8
+            is_dir = os.path.isdir(_arc)
2ed6e8
+            if is_dir:
2ed6e8
+                _arc_name = _arc
2ed6e8
+            else:
2ed6e8
+                archive = tarfile.open(_arc)
2ed6e8
+                _arc_name = _arc.split('/')[-1].split('.tar')[0]
2ed6e8
+            # for each parser, load the map_prep_file into memory, and then
2ed6e8
+            # send that for obfuscation. We don't actually obfuscate the file
2ed6e8
+            # here, do that in the normal archive loop
2ed6e8
+            for _parser in self.parsers:
2ed6e8
+                if not _parser.prep_map_file:
2ed6e8
+                    continue
2ed6e8
+                _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
2ed6e8
+                try:
2ed6e8
+                    if is_dir:
2ed6e8
+                        _pfile = open(_arc_path, 'r')
2ed6e8
+                        content = _pfile.read()
2ed6e8
+                    else:
2ed6e8
+                        _pfile = archive.extractfile(_arc_path)
2ed6e8
+                        content = _pfile.read().decode('utf-8')
2ed6e8
+                    _pfile.close()
2ed6e8
+                    if isinstance(_parser, SoSUsernameParser):
2ed6e8
+                        _parser.load_usernames_into_map(content)
2ed6e8
+                    for line in content.splitlines():
2ed6e8
+                        if isinstance(_parser, SoSHostnameParser):
2ed6e8
+                            _parser.load_hostname_into_map(line)
2ed6e8
+                        self.obfuscate_line(line, _parser.prep_map_file)
2ed6e8
+                except Exception as err:
2ed6e8
+                    self.log_debug("Could not prep %s: %s" % (_arc_path, err))
2ed6e8
+
2ed6e8
     def obfuscate_report(self, report):
2ed6e8
         """Individually handle each archive or directory we've discovered by
2ed6e8
         running through each file therein.
2ed6e8
@@ -493,7 +532,6 @@ third party.
2ed6e8
             start_time = datetime.now()
2ed6e8
             arc_md.add_field('start_time', start_time)
2ed6e8
             archive.extract()
2ed6e8
-            self.prep_maps_from_archive(archive)
2ed6e8
             archive.report_msg("Beginning obfuscation...")
2ed6e8
 
2ed6e8
             file_list = archive.get_file_list()
2ed6e8
@@ -542,35 +580,6 @@ third party.
2ed6e8
             self.ui_log.info("Exception while processing %s: %s"
2ed6e8
                              % (report, err))
2ed6e8
 
2ed6e8
-    def prep_maps_from_archive(self, archive):
2ed6e8
-        """Open specific files from an archive and try to load those values
2ed6e8
-        into our mappings before iterating through the entire archive.
2ed6e8
-
2ed6e8
-        Positional arguments:
2ed6e8
-
2ed6e8
-            :param archive SoSObfuscationArchive:   An open archive object
2ed6e8
-        """
2ed6e8
-        for parser in self.parsers:
2ed6e8
-            if not parser.prep_map_file:
2ed6e8
-                continue
2ed6e8
-            prep_file = archive.get_file_path(parser.prep_map_file)
2ed6e8
-            if not prep_file:
2ed6e8
-                self.log_debug("Could not prepare %s: %s does not exist"
2ed6e8
-                               % (parser.name, parser.prep_map_file),
2ed6e8
-                               caller=archive.archive_name)
2ed6e8
-                continue
2ed6e8
-            # this is a bit clunky, but we need to load this particular
2ed6e8
-            # parser in a different way due to how hostnames are validated for
2ed6e8
-            # obfuscation
2ed6e8
-            if isinstance(parser, SoSHostnameParser):
2ed6e8
-                with open(prep_file, 'r') as host_file:
2ed6e8
-                    hostname = host_file.readline().strip()
2ed6e8
-                    parser.load_hostname_into_map(hostname)
2ed6e8
-            if isinstance(parser, SoSUsernameParser):
2ed6e8
-                parser.load_usernames_into_map(prep_file)
2ed6e8
-            self.obfuscate_file(prep_file, parser.prep_map_file,
2ed6e8
-                                archive.archive_name)
2ed6e8
-
2ed6e8
     def obfuscate_file(self, filename, short_name=None, arc_name=None):
2ed6e8
         """Obfuscate and individual file, line by line.
2ed6e8
 
2ed6e8
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
2ed6e8
index 5223c018..2bb6c7f3 100644
2ed6e8
--- a/sos/cleaner/parsers/username_parser.py
2ed6e8
+++ b/sos/cleaner/parsers/username_parser.py
2ed6e8
@@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser):
2ed6e8
         super(SoSUsernameParser, self).__init__(conf_file)
2ed6e8
         self.mapping.load_names_from_options(opt_names)
2ed6e8
 
2ed6e8
-    def load_usernames_into_map(self, fname):
2ed6e8
+    def load_usernames_into_map(self, content):
2ed6e8
         """Since we don't get the list of usernames from a straight regex for
2ed6e8
         this parser, we need to override the initial parser prepping here.
2ed6e8
         """
2ed6e8
-        with open(fname, 'r') as lastfile:
2ed6e8
-            for line in lastfile.read().splitlines()[1:]:
2ed6e8
-                user = line.split()[0]
2ed6e8
-                if user in self.skip_list:
2ed6e8
-                    continue
2ed6e8
-                self.mapping.get(user)
2ed6e8
+        for line in content.splitlines()[1:]:
2ed6e8
+            user = line.split()[0]
2ed6e8
+            if user in self.skip_list:
2ed6e8
+                continue
2ed6e8
+            self.mapping.get(user)
2ed6e8
 
2ed6e8
     def parse_line(self, line):
2ed6e8
         count = 0
2ed6e8
-- 
2ed6e8
2.26.3
2ed6e8
2ed6e8
2ed6e8
From b713f458bfa92427147de754ea36054bfde53d71 Mon Sep 17 00:00:00 2001
2ed6e8
From: Jake Hunsaker <jhunsake@redhat.com>
2ed6e8
Date: Wed, 14 Apr 2021 12:22:28 -0400
2ed6e8
Subject: [PATCH 2/2] [clean] Remove duplicate file skipping within
2ed6e8
 obfuscate_line()
2ed6e8
2ed6e8
A redundant file skipping check was being executed within
2ed6e8
`obfuscate_line()` that would cause subsequent archives being obfuscated
2ed6e8
to skip line obfuscation within a file, despite iterating through the
2ed6e8
entire file.
2ed6e8
2ed6e8
Remove this redundant check, thus allowing proper obfuscation.
2ed6e8
2ed6e8
Closes: #2490
2ed6e8
Related: RHBZ#1930181
2ed6e8
Resolves: #2492
2ed6e8
2ed6e8
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
2ed6e8
---
2ed6e8
 sos/cleaner/__init__.py            | 11 +++--------
2ed6e8
 sos/cleaner/obfuscation_archive.py |  2 --
2ed6e8
 2 files changed, 3 insertions(+), 10 deletions(-)
2ed6e8
2ed6e8
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
2ed6e8
index d10cdc55..bdd24f95 100644
2ed6e8
--- a/sos/cleaner/__init__.py
2ed6e8
+++ b/sos/cleaner/__init__.py
2ed6e8
@@ -508,7 +508,7 @@ third party.
2ed6e8
                     for line in content.splitlines():
2ed6e8
                         if isinstance(_parser, SoSHostnameParser):
2ed6e8
                             _parser.load_hostname_into_map(line)
2ed6e8
-                        self.obfuscate_line(line, _parser.prep_map_file)
2ed6e8
+                        self.obfuscate_line(line)
2ed6e8
                 except Exception as err:
2ed6e8
                     self.log_debug("Could not prep %s: %s" % (_arc_path, err))
2ed6e8
 
2ed6e8
@@ -606,7 +606,7 @@ third party.
2ed6e8
                 if not line.strip():
2ed6e8
                     continue
2ed6e8
                 try:
2ed6e8
-                    line, count = self.obfuscate_line(line, short_name)
2ed6e8
+                    line, count = self.obfuscate_line(line)
2ed6e8
                     subs += count
2ed6e8
                     tfile.write(line)
2ed6e8
                 except Exception as err:
2ed6e8
@@ -631,7 +631,7 @@ third party.
2ed6e8
                 pass
2ed6e8
         return string_data
2ed6e8
 
2ed6e8
-    def obfuscate_line(self, line, filename):
2ed6e8
+    def obfuscate_line(self, line):
2ed6e8
         """Run a line through each of the obfuscation parsers, keeping a
2ed6e8
         cumulative total of substitutions done on that particular line.
2ed6e8
 
2ed6e8
@@ -639,16 +639,11 @@ third party.
2ed6e8
 
2ed6e8
             :param line str:        The raw line as read from the file being
2ed6e8
                                     processed
2ed6e8
-            :param filename str:    Filename the line was read from
2ed6e8
 
2ed6e8
         Returns the fully obfuscated line and the number of substitutions made
2ed6e8
         """
2ed6e8
         count = 0
2ed6e8
         for parser in self.parsers:
2ed6e8
-            if filename and any([
2ed6e8
-                re.match(_s, filename) for _s in parser.skip_files
2ed6e8
-            ]):
2ed6e8
-                continue
2ed6e8
             try:
2ed6e8
                 line, _count = parser.parse_line(line)
2ed6e8
                 count += _count
2ed6e8
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
2ed6e8
index 84ca30cd..c64ab13b 100644
2ed6e8
--- a/sos/cleaner/obfuscation_archive.py
2ed6e8
+++ b/sos/cleaner/obfuscation_archive.py
2ed6e8
@@ -219,8 +219,6 @@ class SoSObfuscationArchive():
2ed6e8
             :param filename str:        Filename relative to the extracted
2ed6e8
                                         archive root
2ed6e8
         """
2ed6e8
-        if filename in self.file_sub_list:
2ed6e8
-            return True
2ed6e8
 
2ed6e8
         if not os.path.isfile(self.get_file_path(filename)):
2ed6e8
             return True
2ed6e8
-- 
2ed6e8
2.26.3
2ed6e8