|
|
ba407d |
From fc0218638f3e865c4315823e72aef2f46d012d07 Mon Sep 17 00:00:00 2001
|
|
|
ba407d |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
ba407d |
Date: Wed, 14 Apr 2021 11:55:03 -0400
|
|
|
ba407d |
Subject: [PATCH 1/2] [clean] Load maps from all archives before obfuscation
|
|
|
ba407d |
loop
|
|
|
ba407d |
|
|
|
ba407d |
Previously, maps were being prepped via archives after extraction. This
|
|
|
ba407d |
reduced the amount of file IO being done, but made it so that necessary
|
|
|
ba407d |
obfuscations from later archives in a series would not be obfuscated in
|
|
|
ba407d |
the archives obfuscated before those later archives were extracted.
|
|
|
ba407d |
|
|
|
ba407d |
Fix this by extracting the map prep files into memory for each archive
|
|
|
ba407d |
to prep the maps before we enter the obfuscation loop entirely.
|
|
|
ba407d |
|
|
|
ba407d |
Closes: #2490
|
|
|
ba407d |
Related: RHBZ#1930181
|
|
|
ba407d |
Resolves: #2492
|
|
|
ba407d |
|
|
|
ba407d |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
ba407d |
---
|
|
|
ba407d |
sos/cleaner/__init__.py | 69 +++++++++++++++-----------
|
|
|
ba407d |
sos/cleaner/parsers/username_parser.py | 13 +++--
|
|
|
ba407d |
2 files changed, 45 insertions(+), 37 deletions(-)
|
|
|
ba407d |
|
|
|
ba407d |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
ba407d |
index b9eb61ef..d10cdc55 100644
|
|
|
ba407d |
--- a/sos/cleaner/__init__.py
|
|
|
ba407d |
+++ b/sos/cleaner/__init__.py
|
|
|
ba407d |
@@ -292,6 +292,7 @@ third party.
|
|
|
ba407d |
|
|
|
ba407d |
# we have at least one valid target to obfuscate
|
|
|
ba407d |
self.completed_reports = []
|
|
|
ba407d |
+ self.preload_all_archives_into_maps()
|
|
|
ba407d |
self.obfuscate_report_paths()
|
|
|
ba407d |
|
|
|
ba407d |
if not self.completed_reports:
|
|
|
ba407d |
@@ -473,6 +474,44 @@ third party.
|
|
|
ba407d |
self.ui_log.info("Exiting on user cancel")
|
|
|
ba407d |
os._exit(130)
|
|
|
ba407d |
|
|
|
ba407d |
+ def preload_all_archives_into_maps(self):
|
|
|
ba407d |
+ """Before doing the actual obfuscation, if we have multiple archives
|
|
|
ba407d |
+ to obfuscate then we need to preload each of them into the mappings
|
|
|
ba407d |
+ to ensure that node1 is obfuscated in node2 as well as node2 being
|
|
|
ba407d |
+ obfuscated in node1's archive.
|
|
|
ba407d |
+ """
|
|
|
ba407d |
+ self.log_info("Pre-loading multiple archives into obfuscation maps")
|
|
|
ba407d |
+ for _arc in self.report_paths:
|
|
|
ba407d |
+ is_dir = os.path.isdir(_arc)
|
|
|
ba407d |
+ if is_dir:
|
|
|
ba407d |
+ _arc_name = _arc
|
|
|
ba407d |
+ else:
|
|
|
ba407d |
+ archive = tarfile.open(_arc)
|
|
|
ba407d |
+ _arc_name = _arc.split('/')[-1].split('.tar')[0]
|
|
|
ba407d |
+ # for each parser, load the map_prep_file into memory, and then
|
|
|
ba407d |
+ # send that for obfuscation. We don't actually obfuscate the file
|
|
|
ba407d |
+ # here, do that in the normal archive loop
|
|
|
ba407d |
+ for _parser in self.parsers:
|
|
|
ba407d |
+ if not _parser.prep_map_file:
|
|
|
ba407d |
+ continue
|
|
|
ba407d |
+ _arc_path = os.path.join(_arc_name, _parser.prep_map_file)
|
|
|
ba407d |
+ try:
|
|
|
ba407d |
+ if is_dir:
|
|
|
ba407d |
+ _pfile = open(_arc_path, 'r')
|
|
|
ba407d |
+ content = _pfile.read()
|
|
|
ba407d |
+ else:
|
|
|
ba407d |
+ _pfile = archive.extractfile(_arc_path)
|
|
|
ba407d |
+ content = _pfile.read().decode('utf-8')
|
|
|
ba407d |
+ _pfile.close()
|
|
|
ba407d |
+ if isinstance(_parser, SoSUsernameParser):
|
|
|
ba407d |
+ _parser.load_usernames_into_map(content)
|
|
|
ba407d |
+ for line in content.splitlines():
|
|
|
ba407d |
+ if isinstance(_parser, SoSHostnameParser):
|
|
|
ba407d |
+ _parser.load_hostname_into_map(line)
|
|
|
ba407d |
+ self.obfuscate_line(line, _parser.prep_map_file)
|
|
|
ba407d |
+ except Exception as err:
|
|
|
ba407d |
+ self.log_debug("Could not prep %s: %s" % (_arc_path, err))
|
|
|
ba407d |
+
|
|
|
ba407d |
def obfuscate_report(self, report):
|
|
|
ba407d |
"""Individually handle each archive or directory we've discovered by
|
|
|
ba407d |
running through each file therein.
|
|
|
ba407d |
@@ -493,7 +532,6 @@ third party.
|
|
|
ba407d |
start_time = datetime.now()
|
|
|
ba407d |
arc_md.add_field('start_time', start_time)
|
|
|
ba407d |
archive.extract()
|
|
|
ba407d |
- self.prep_maps_from_archive(archive)
|
|
|
ba407d |
archive.report_msg("Beginning obfuscation...")
|
|
|
ba407d |
|
|
|
ba407d |
file_list = archive.get_file_list()
|
|
|
ba407d |
@@ -542,35 +580,6 @@ third party.
|
|
|
ba407d |
self.ui_log.info("Exception while processing %s: %s"
|
|
|
ba407d |
% (report, err))
|
|
|
ba407d |
|
|
|
ba407d |
- def prep_maps_from_archive(self, archive):
|
|
|
ba407d |
- """Open specific files from an archive and try to load those values
|
|
|
ba407d |
- into our mappings before iterating through the entire archive.
|
|
|
ba407d |
-
|
|
|
ba407d |
- Positional arguments:
|
|
|
ba407d |
-
|
|
|
ba407d |
- :param archive SoSObfuscationArchive: An open archive object
|
|
|
ba407d |
- """
|
|
|
ba407d |
- for parser in self.parsers:
|
|
|
ba407d |
- if not parser.prep_map_file:
|
|
|
ba407d |
- continue
|
|
|
ba407d |
- prep_file = archive.get_file_path(parser.prep_map_file)
|
|
|
ba407d |
- if not prep_file:
|
|
|
ba407d |
- self.log_debug("Could not prepare %s: %s does not exist"
|
|
|
ba407d |
- % (parser.name, parser.prep_map_file),
|
|
|
ba407d |
- caller=archive.archive_name)
|
|
|
ba407d |
- continue
|
|
|
ba407d |
- # this is a bit clunky, but we need to load this particular
|
|
|
ba407d |
- # parser in a different way due to how hostnames are validated for
|
|
|
ba407d |
- # obfuscation
|
|
|
ba407d |
- if isinstance(parser, SoSHostnameParser):
|
|
|
ba407d |
- with open(prep_file, 'r') as host_file:
|
|
|
ba407d |
- hostname = host_file.readline().strip()
|
|
|
ba407d |
- parser.load_hostname_into_map(hostname)
|
|
|
ba407d |
- if isinstance(parser, SoSUsernameParser):
|
|
|
ba407d |
- parser.load_usernames_into_map(prep_file)
|
|
|
ba407d |
- self.obfuscate_file(prep_file, parser.prep_map_file,
|
|
|
ba407d |
- archive.archive_name)
|
|
|
ba407d |
-
|
|
|
ba407d |
def obfuscate_file(self, filename, short_name=None, arc_name=None):
|
|
|
ba407d |
"""Obfuscate and individual file, line by line.
|
|
|
ba407d |
|
|
|
ba407d |
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
|
|
ba407d |
index 5223c018..2bb6c7f3 100644
|
|
|
ba407d |
--- a/sos/cleaner/parsers/username_parser.py
|
|
|
ba407d |
+++ b/sos/cleaner/parsers/username_parser.py
|
|
|
ba407d |
@@ -39,16 +39,15 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
ba407d |
super(SoSUsernameParser, self).__init__(conf_file)
|
|
|
ba407d |
self.mapping.load_names_from_options(opt_names)
|
|
|
ba407d |
|
|
|
ba407d |
- def load_usernames_into_map(self, fname):
|
|
|
ba407d |
+ def load_usernames_into_map(self, content):
|
|
|
ba407d |
"""Since we don't get the list of usernames from a straight regex for
|
|
|
ba407d |
this parser, we need to override the initial parser prepping here.
|
|
|
ba407d |
"""
|
|
|
ba407d |
- with open(fname, 'r') as lastfile:
|
|
|
ba407d |
- for line in lastfile.read().splitlines()[1:]:
|
|
|
ba407d |
- user = line.split()[0]
|
|
|
ba407d |
- if user in self.skip_list:
|
|
|
ba407d |
- continue
|
|
|
ba407d |
- self.mapping.get(user)
|
|
|
ba407d |
+ for line in content.splitlines()[1:]:
|
|
|
ba407d |
+ user = line.split()[0]
|
|
|
ba407d |
+ if user in self.skip_list:
|
|
|
ba407d |
+ continue
|
|
|
ba407d |
+ self.mapping.get(user)
|
|
|
ba407d |
|
|
|
ba407d |
def parse_line(self, line):
|
|
|
ba407d |
count = 0
|
|
|
ba407d |
--
|
|
|
ba407d |
2.26.3
|
|
|
ba407d |
|
|
|
ba407d |
|
|
|
ba407d |
From b713f458bfa92427147de754ea36054bfde53d71 Mon Sep 17 00:00:00 2001
|
|
|
ba407d |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
ba407d |
Date: Wed, 14 Apr 2021 12:22:28 -0400
|
|
|
ba407d |
Subject: [PATCH 2/2] [clean] Remove duplicate file skipping within
|
|
|
ba407d |
obfuscate_line()
|
|
|
ba407d |
|
|
|
ba407d |
A redundant file skipping check was being executed within
|
|
|
ba407d |
`obfuscate_line()` that would cause subsequent archives being obfuscated
|
|
|
ba407d |
to skip line obfuscation within a file, despite iterating through the
|
|
|
ba407d |
entire file.
|
|
|
ba407d |
|
|
|
ba407d |
Remove this redundant check, thus allowing proper obfuscation.
|
|
|
ba407d |
|
|
|
ba407d |
Closes: #2490
|
|
|
ba407d |
Related: RHBZ#1930181
|
|
|
ba407d |
Resolves: #2492
|
|
|
ba407d |
|
|
|
ba407d |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
ba407d |
---
|
|
|
ba407d |
sos/cleaner/__init__.py | 11 +++--------
|
|
|
ba407d |
sos/cleaner/obfuscation_archive.py | 2 --
|
|
|
ba407d |
2 files changed, 3 insertions(+), 10 deletions(-)
|
|
|
ba407d |
|
|
|
ba407d |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
ba407d |
index d10cdc55..bdd24f95 100644
|
|
|
ba407d |
--- a/sos/cleaner/__init__.py
|
|
|
ba407d |
+++ b/sos/cleaner/__init__.py
|
|
|
ba407d |
@@ -508,7 +508,7 @@ third party.
|
|
|
ba407d |
for line in content.splitlines():
|
|
|
ba407d |
if isinstance(_parser, SoSHostnameParser):
|
|
|
ba407d |
_parser.load_hostname_into_map(line)
|
|
|
ba407d |
- self.obfuscate_line(line, _parser.prep_map_file)
|
|
|
ba407d |
+ self.obfuscate_line(line)
|
|
|
ba407d |
except Exception as err:
|
|
|
ba407d |
self.log_debug("Could not prep %s: %s" % (_arc_path, err))
|
|
|
ba407d |
|
|
|
ba407d |
@@ -606,7 +606,7 @@ third party.
|
|
|
ba407d |
if not line.strip():
|
|
|
ba407d |
continue
|
|
|
ba407d |
try:
|
|
|
ba407d |
- line, count = self.obfuscate_line(line, short_name)
|
|
|
ba407d |
+ line, count = self.obfuscate_line(line)
|
|
|
ba407d |
subs += count
|
|
|
ba407d |
tfile.write(line)
|
|
|
ba407d |
except Exception as err:
|
|
|
ba407d |
@@ -631,7 +631,7 @@ third party.
|
|
|
ba407d |
pass
|
|
|
ba407d |
return string_data
|
|
|
ba407d |
|
|
|
ba407d |
- def obfuscate_line(self, line, filename):
|
|
|
ba407d |
+ def obfuscate_line(self, line):
|
|
|
ba407d |
"""Run a line through each of the obfuscation parsers, keeping a
|
|
|
ba407d |
cumulative total of substitutions done on that particular line.
|
|
|
ba407d |
|
|
|
ba407d |
@@ -639,16 +639,11 @@ third party.
|
|
|
ba407d |
|
|
|
ba407d |
:param line str: The raw line as read from the file being
|
|
|
ba407d |
processed
|
|
|
ba407d |
- :param filename str: Filename the line was read from
|
|
|
ba407d |
|
|
|
ba407d |
Returns the fully obfuscated line and the number of substitutions made
|
|
|
ba407d |
"""
|
|
|
ba407d |
count = 0
|
|
|
ba407d |
for parser in self.parsers:
|
|
|
ba407d |
- if filename and any([
|
|
|
ba407d |
- re.match(_s, filename) for _s in parser.skip_files
|
|
|
ba407d |
- ]):
|
|
|
ba407d |
- continue
|
|
|
ba407d |
try:
|
|
|
ba407d |
line, _count = parser.parse_line(line)
|
|
|
ba407d |
count += _count
|
|
|
ba407d |
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
|
|
|
ba407d |
index 84ca30cd..c64ab13b 100644
|
|
|
ba407d |
--- a/sos/cleaner/obfuscation_archive.py
|
|
|
ba407d |
+++ b/sos/cleaner/obfuscation_archive.py
|
|
|
ba407d |
@@ -219,8 +219,6 @@ class SoSObfuscationArchive():
|
|
|
ba407d |
:param filename str: Filename relative to the extracted
|
|
|
ba407d |
archive root
|
|
|
ba407d |
"""
|
|
|
ba407d |
- if filename in self.file_sub_list:
|
|
|
ba407d |
- return True
|
|
|
ba407d |
|
|
|
ba407d |
if not os.path.isfile(self.get_file_path(filename)):
|
|
|
ba407d |
return True
|
|
|
ba407d |
--
|
|
|
ba407d |
2.26.3
|
|
|
ba407d |
|