Blame SOURCES/sos-bz1967110-collect-cleaning-consistency.patch

47940b
From fc0218638f3e865c4315823e72aef2f46d012d07 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 14 Apr 2021 11:55:03 -0400
47940b
Subject: [PATCH 1/2] [clean] Load maps from all archives before obfuscation
47940b
 loop
47940b
47940b
Previously, maps were being prepped via archives after extraction. This
47940b
reduced the amount of file IO being done, but made it so that necessary
47940b
obfuscations from later archives in a series would not be obfuscated in
47940b
the archives obfuscated before those later archives were extracted.
47940b
47940b
Fix this by extracting the map prep files into memory for each archive
47940b
to prep the maps before we enter the obfuscation loop entirely.
47940b
47940b
Closes: #2490
47940b
Related: RHBZ#1930181
47940b
Resolves: #2492
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/__init__.py                | 69 +++++++++++++++-----------
47940b
 sos/cleaner/parsers/username_parser.py | 13 +++--
47940b
 2 files changed, 45 insertions(+), 37 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index b9eb61ef..d10cdc55 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -292,6 +292,7 @@ third party.
47940b
 
47940b
         # we have at least one valid target to obfuscate
47940b
         self.completed_reports = []
47940b
+        self.preload_all_archives_into_maps()
47940b
         self.obfuscate_report_paths()
47940b
 
47940b
         if not self.completed_reports:
47940b
@@ -473,6 +474,44 @@ third party.
47940b
             self.ui_log.info("Exiting on user cancel")
47940b
             os._exit(130)
47940b
 
47940b
+    def preload_all_archives_into_maps(self):
47940b
+        """Before doing the actual obfuscation, if we have multiple archives
47940b
+        to obfuscate then we need to preload each of them into the mappings
47940b
+        to ensure that node1 is obfuscated in node2 as well as node2 being
47940b
+        obfuscated in node1's archive.
47940b
+        """
47940b
+        self.log_info("Pre-loading multiple archives into obfuscation maps")
47940b
+        for _arc in self.report_paths:
47940b
+            is_dir = os.path.isdir(_arc)
47940b
+            if is_dir:
47940b
+                _arc_name = _arc
47940b
+            else:
47940b
+                archive = tarfile.open(_arc)
47940b
+                _arc_name = _arc.split('/')[-1].split('.tar')[0]
47940b
+            # for each parser, load the map_prep_file into memory, and then
47940b
+            # send that for obfuscation. We don't actually obfuscate the file
47940b
+            # here, do that in the normal archive loop
47940b
+            for _parser in self.parsers:
47940b
+                if not _parser.prep_map_file:
47940b
+                    continue
47940b
+                _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
47940b
+                try:
47940b
+                    if is_dir:
47940b
+                        _pfile = open(_arc_path, 'r')
47940b
+                        content = _pfile.read()
47940b
+                    else:
47940b
+                        _pfile = archive.extractfile(_arc_path)
47940b
+                        content = _pfile.read().decode('utf-8')
47940b
+                    _pfile.close()
47940b
+                    if isinstance(_parser, SoSUsernameParser):
47940b
+                        _parser.load_usernames_into_map(content)
47940b
+                    for line in content.splitlines():
47940b
+                        if isinstance(_parser, SoSHostnameParser):
47940b
+                            _parser.load_hostname_into_map(line)
47940b
+                        self.obfuscate_line(line, _parser.prep_map_file)
47940b
+                except Exception as err:
47940b
+                    self.log_debug("Could not prep %s: %s" % (_arc_path, err))
47940b
+
47940b
     def obfuscate_report(self, report):
47940b
         """Individually handle each archive or directory we've discovered by
47940b
         running through each file therein.
47940b
@@ -493,7 +532,6 @@ third party.
47940b
             start_time = datetime.now()
47940b
             arc_md.add_field('start_time', start_time)
47940b
             archive.extract()
47940b
-            self.prep_maps_from_archive(archive)
47940b
             archive.report_msg("Beginning obfuscation...")
47940b
 
47940b
             file_list = archive.get_file_list()
47940b
@@ -542,35 +580,6 @@ third party.
47940b
             self.ui_log.info("Exception while processing %s: %s"
47940b
                              % (report, err))
47940b
 
47940b
-    def prep_maps_from_archive(self, archive):
47940b
-        """Open specific files from an archive and try to load those values
47940b
-        into our mappings before iterating through the entire archive.
47940b
-
47940b
-        Positional arguments:
47940b
-
47940b
-            :param archive SoSObfuscationArchive:   An open archive object
47940b
-        """
47940b
-        for parser in self.parsers:
47940b
-            if not parser.prep_map_file:
47940b
-                continue
47940b
-            prep_file = archive.get_file_path(parser.prep_map_file)
47940b
-            if not prep_file:
47940b
-                self.log_debug("Could not prepare %s: %s does not exist"
47940b
-                               % (parser.name, parser.prep_map_file),
47940b
-                               caller=archive.archive_name)
47940b
-                continue
47940b
-            # this is a bit clunky, but we need to load this particular
47940b
-            # parser in a different way due to how hostnames are validated for
47940b
-            # obfuscation
47940b
-            if isinstance(parser, SoSHostnameParser):
47940b
-                with open(prep_file, 'r') as host_file:
47940b
-                    hostname = host_file.readline().strip()
47940b
-                    parser.load_hostname_into_map(hostname)
47940b
-            if isinstance(parser, SoSUsernameParser):
47940b
-                parser.load_usernames_into_map(prep_file)
47940b
-            self.obfuscate_file(prep_file, parser.prep_map_file,
47940b
-                                archive.archive_name)
47940b
-
47940b
     def obfuscate_file(self, filename, short_name=None, arc_name=None):
47940b
         """Obfuscate and individual file, line by line.
47940b
 
47940b
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
47940b
index 5223c018..2bb6c7f3 100644
47940b
--- a/sos/cleaner/parsers/username_parser.py
47940b
+++ b/sos/cleaner/parsers/username_parser.py
47940b
@@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser):
47940b
         super(SoSUsernameParser, self).__init__(conf_file)
47940b
         self.mapping.load_names_from_options(opt_names)
47940b
 
47940b
-    def load_usernames_into_map(self, fname):
47940b
+    def load_usernames_into_map(self, content):
47940b
         """Since we don't get the list of usernames from a straight regex for
47940b
         this parser, we need to override the initial parser prepping here.
47940b
         """
47940b
-        with open(fname, 'r') as lastfile:
47940b
-            for line in lastfile.read().splitlines()[1:]:
47940b
-                user = line.split()[0]
47940b
-                if user in self.skip_list:
47940b
-                    continue
47940b
-                self.mapping.get(user)
47940b
+        for line in content.splitlines()[1:]:
47940b
+            user = line.split()[0]
47940b
+            if user in self.skip_list:
47940b
+                continue
47940b
+            self.mapping.get(user)
47940b
 
47940b
     def parse_line(self, line):
47940b
         count = 0
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From b713f458bfa92427147de754ea36054bfde53d71 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 14 Apr 2021 12:22:28 -0400
47940b
Subject: [PATCH 2/2] [clean] Remove duplicate file skipping within
47940b
 obfuscate_line()
47940b
47940b
A redundant file skipping check was being executed within
47940b
`obfuscate_line()` that would cause subsequent archives being obfuscated
47940b
to skip line obfuscation within a file, despite iterating through the
47940b
entire file.
47940b
47940b
Remove this redundant check, thus allowing proper obfuscation.
47940b
47940b
Closes: #2490
47940b
Related: RHBZ#1930181
47940b
Resolves: #2492
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/__init__.py            | 11 +++--------
47940b
 sos/cleaner/obfuscation_archive.py |  2 --
47940b
 2 files changed, 3 insertions(+), 10 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index d10cdc55..bdd24f95 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -508,7 +508,7 @@ third party.
47940b
                     for line in content.splitlines():
47940b
                         if isinstance(_parser, SoSHostnameParser):
47940b
                             _parser.load_hostname_into_map(line)
47940b
-                        self.obfuscate_line(line, _parser.prep_map_file)
47940b
+                        self.obfuscate_line(line)
47940b
                 except Exception as err:
47940b
                     self.log_debug("Could not prep %s: %s" % (_arc_path, err))
47940b
 
47940b
@@ -606,7 +606,7 @@ third party.
47940b
                 if not line.strip():
47940b
                     continue
47940b
                 try:
47940b
-                    line, count = self.obfuscate_line(line, short_name)
47940b
+                    line, count = self.obfuscate_line(line)
47940b
                     subs += count
47940b
                     tfile.write(line)
47940b
                 except Exception as err:
47940b
@@ -631,7 +631,7 @@ third party.
47940b
                 pass
47940b
         return string_data
47940b
 
47940b
-    def obfuscate_line(self, line, filename):
47940b
+    def obfuscate_line(self, line):
47940b
         """Run a line through each of the obfuscation parsers, keeping a
47940b
         cumulative total of substitutions done on that particular line.
47940b
 
47940b
@@ -639,16 +639,11 @@ third party.
47940b
 
47940b
             :param line str:        The raw line as read from the file being
47940b
                                     processed
47940b
-            :param filename str:    Filename the line was read from
47940b
 
47940b
         Returns the fully obfuscated line and the number of substitutions made
47940b
         """
47940b
         count = 0
47940b
         for parser in self.parsers:
47940b
-            if filename and any([
47940b
-                re.match(_s, filename) for _s in parser.skip_files
47940b
-            ]):
47940b
-                continue
47940b
             try:
47940b
                 line, _count = parser.parse_line(line)
47940b
                 count += _count
47940b
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
47940b
index 84ca30cd..c64ab13b 100644
47940b
--- a/sos/cleaner/obfuscation_archive.py
47940b
+++ b/sos/cleaner/obfuscation_archive.py
47940b
@@ -219,8 +219,6 @@ class SoSObfuscationArchive():
47940b
             :param filename str:        Filename relative to the extracted
47940b
                                         archive root
47940b
         """
47940b
-        if filename in self.file_sub_list:
47940b
-            return True
47940b
 
47940b
         if not os.path.isfile(self.get_file_path(filename)):
47940b
             return True
47940b
-- 
47940b
2.26.3
47940b