|
|
003633 |
From decd39b7799a0579ea085b0da0728b6eabd49b38 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Wed, 1 Sep 2021 00:28:58 -0400
|
|
|
003633 |
Subject: [PATCH] [clean] Provide archive abstractions to obfuscate more than
|
|
|
003633 |
sos archives
|
|
|
003633 |
|
|
|
003633 |
This commit removes the restriction imposed on `sos clean` since its
|
|
|
003633 |
introduction in sos-4.0 to only work against known sos report archives
|
|
|
003633 |
or build directories. This is because there has been interest in using
|
|
|
003633 |
the obfuscation bits of sos in other data-collector projects.
|
|
|
003633 |
|
|
|
003633 |
The `SoSObfuscationArchive()` class has been revamped to now be an
|
|
|
003633 |
abstraction for different types of archives, and the cleaner logic has
|
|
|
003633 |
been updated to leverage this new abstraction rather than assuming we're
|
|
|
003633 |
working on an sos archive.
|
|
|
003633 |
|
|
|
003633 |
Abstractions are added for our own native use cases - that being `sos
|
|
|
003633 |
report` and `sos collect` for at-runtime obfuscation, as well as
|
|
|
003633 |
standalone archives previously generated. Further generic abstractions
|
|
|
003633 |
are available for plain directories and tarballs however these will not
|
|
|
003633 |
provide the same level of coverage as fully supported archive types, as
|
|
|
003633 |
is noted in the manpage for sos-clean.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
man/en/sos-clean.1 | 25 ++
|
|
|
003633 |
sos/cleaner/__init__.py | 308 +++++++++---------
|
|
|
003633 |
.../__init__.py} | 80 ++++-
|
|
|
003633 |
sos/cleaner/archives/generic.py | 52 +++
|
|
|
003633 |
sos/cleaner/archives/sos.py | 106 ++++++
|
|
|
003633 |
sos/cleaner/parsers/__init__.py | 6 -
|
|
|
003633 |
sos/cleaner/parsers/hostname_parser.py | 1 -
|
|
|
003633 |
sos/cleaner/parsers/ip_parser.py | 1 -
|
|
|
003633 |
sos/cleaner/parsers/keyword_parser.py | 1 -
|
|
|
003633 |
sos/cleaner/parsers/mac_parser.py | 1 -
|
|
|
003633 |
sos/cleaner/parsers/username_parser.py | 8 -
|
|
|
003633 |
tests/cleaner_tests/existing_archive.py | 7 +
|
|
|
003633 |
tests/cleaner_tests/full_report_run.py | 3 +
|
|
|
003633 |
tests/cleaner_tests/report_with_mask.py | 3 +
|
|
|
003633 |
14 files changed, 423 insertions(+), 179 deletions(-)
|
|
|
003633 |
rename sos/cleaner/{obfuscation_archive.py => archives/__init__.py} (81%)
|
|
|
003633 |
create mode 100644 sos/cleaner/archives/generic.py
|
|
|
003633 |
create mode 100644 sos/cleaner/archives/sos.py
|
|
|
003633 |
|
|
|
003633 |
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
|
|
|
003633 |
index b77bc63c..54026713 100644
|
|
|
003633 |
--- a/man/en/sos-clean.1
|
|
|
003633 |
+++ b/man/en/sos-clean.1
|
|
|
003633 |
@@ -10,6 +10,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
|
|
|
003633 |
[\-\-jobs]
|
|
|
003633 |
[\-\-no-update]
|
|
|
003633 |
[\-\-keep-binary-files]
|
|
|
003633 |
+ [\-\-archive-type]
|
|
|
003633 |
|
|
|
003633 |
.SH DESCRIPTION
|
|
|
003633 |
\fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
|
|
|
003633 |
@@ -88,6 +89,30 @@ Users should review any archive that keeps binary files in place before sending
|
|
|
003633 |
a third party.
|
|
|
003633 |
|
|
|
003633 |
Default: False (remove encountered binary files)
|
|
|
003633 |
+.TP
|
|
|
003633 |
+.B \-\-archive-type TYPE
|
|
|
003633 |
+Specify the type of archive that TARGET was generated as.
|
|
|
003633 |
+When sos inspects a TARGET archive, it tries to identify what type of archive it is.
|
|
|
003633 |
+For example, it may be a report generated by \fBsos report\fR, or a collection of those
|
|
|
003633 |
+reports generated by \fBsos collect\fR, which require separate approaches.
|
|
|
003633 |
+
|
|
|
003633 |
+This option may be useful if a given TARGET archive is known to be of a specific type,
|
|
|
003633 |
+but due to unknown reasons or some malformed/missing information in the archive directly,
|
|
|
003633 |
+that is not properly identified by sos.
|
|
|
003633 |
+
|
|
|
003633 |
+The following are accepted values for this option:
|
|
|
003633 |
+
|
|
|
003633 |
+ \fBauto\fR Automatically detect the archive type
|
|
|
003633 |
+ \fBreport\fR An archive generated by \fBsos report\fR
|
|
|
003633 |
+ \fBcollect\fR An archive generated by \fBsos collect\fR
|
|
|
003633 |
+
|
|
|
003633 |
+The following may also be used, however note that these do not attempt to pre-load
|
|
|
003633 |
+any information from the archives into the parsers. This means that, among other limitations,
|
|
|
003633 |
+items like host and domain names may not be obfuscated unless an obfuscated mapping already exists
|
|
|
003633 |
+on the system from a previous execution.
|
|
|
003633 |
+
|
|
|
003633 |
+ \fBdata-dir\fR A plain directory on the filesystem.
|
|
|
003633 |
+ \fBtarball\fR A generic tar archive not associated with any known tool
|
|
|
003633 |
|
|
|
003633 |
.SH SEE ALSO
|
|
|
003633 |
.BR sos (1)
|
|
|
003633 |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
003633 |
index 6aadfe79..6d2eb483 100644
|
|
|
003633 |
--- a/sos/cleaner/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/__init__.py
|
|
|
003633 |
@@ -12,9 +12,7 @@ import hashlib
|
|
|
003633 |
import json
|
|
|
003633 |
import logging
|
|
|
003633 |
import os
|
|
|
003633 |
-import re
|
|
|
003633 |
import shutil
|
|
|
003633 |
-import tarfile
|
|
|
003633 |
import tempfile
|
|
|
003633 |
|
|
|
003633 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
003633 |
@@ -27,7 +25,10 @@ from sos.cleaner.parsers.mac_parser import SoSMacParser
|
|
|
003633 |
from sos.cleaner.parsers.hostname_parser import SoSHostnameParser
|
|
|
003633 |
from sos.cleaner.parsers.keyword_parser import SoSKeywordParser
|
|
|
003633 |
from sos.cleaner.parsers.username_parser import SoSUsernameParser
|
|
|
003633 |
-from sos.cleaner.obfuscation_archive import SoSObfuscationArchive
|
|
|
003633 |
+from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
|
|
|
003633 |
+ SoSCollectorArchive,
|
|
|
003633 |
+ SoSCollectorDirectory)
|
|
|
003633 |
+from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
|
|
|
003633 |
from sos.utilities import get_human_readable
|
|
|
003633 |
from textwrap import fill
|
|
|
003633 |
|
|
|
003633 |
@@ -41,6 +42,7 @@ class SoSCleaner(SoSComponent):
|
|
|
003633 |
desc = "Obfuscate sensitive networking information in a report"
|
|
|
003633 |
|
|
|
003633 |
arg_defaults = {
|
|
|
003633 |
+ 'archive_type': 'auto',
|
|
|
003633 |
'domains': [],
|
|
|
003633 |
'jobs': 4,
|
|
|
003633 |
'keywords': [],
|
|
|
003633 |
@@ -70,6 +72,7 @@ class SoSCleaner(SoSComponent):
|
|
|
003633 |
self.from_cmdline = False
|
|
|
003633 |
if not hasattr(self.opts, 'jobs'):
|
|
|
003633 |
self.opts.jobs = 4
|
|
|
003633 |
+ self.opts.archive_type = 'auto'
|
|
|
003633 |
self.soslog = logging.getLogger('sos')
|
|
|
003633 |
self.ui_log = logging.getLogger('sos_ui')
|
|
|
003633 |
# create the tmp subdir here to avoid a potential race condition
|
|
|
003633 |
@@ -92,6 +95,17 @@ class SoSCleaner(SoSComponent):
|
|
|
003633 |
SoSUsernameParser(self.cleaner_mapping, self.opts.usernames)
|
|
|
003633 |
]
|
|
|
003633 |
|
|
|
003633 |
+ self.archive_types = [
|
|
|
003633 |
+ SoSReportDirectory,
|
|
|
003633 |
+ SoSReportArchive,
|
|
|
003633 |
+ SoSCollectorDirectory,
|
|
|
003633 |
+ SoSCollectorArchive,
|
|
|
003633 |
+ # make sure these two are always last as they are fallbacks
|
|
|
003633 |
+ DataDirArchive,
|
|
|
003633 |
+ TarballArchive
|
|
|
003633 |
+ ]
|
|
|
003633 |
+ self.nested_archive = None
|
|
|
003633 |
+
|
|
|
003633 |
self.log_info("Cleaner initialized. From cmdline: %s"
|
|
|
003633 |
% self.from_cmdline)
|
|
|
003633 |
|
|
|
003633 |
@@ -178,6 +192,11 @@ third party.
|
|
|
003633 |
)
|
|
|
003633 |
clean_grp.add_argument('target', metavar='TARGET',
|
|
|
003633 |
help='The directory or archive to obfuscate')
|
|
|
003633 |
+ clean_grp.add_argument('--archive-type', default='auto',
|
|
|
003633 |
+ choices=['auto', 'report', 'collect',
|
|
|
003633 |
+ 'data-dir', 'tarball'],
|
|
|
003633 |
+ help=('Specify what kind of archive the target '
|
|
|
003633 |
+ 'was generated as'))
|
|
|
003633 |
clean_grp.add_argument('--domains', action='extend', default=[],
|
|
|
003633 |
help='List of domain names to obfuscate')
|
|
|
003633 |
clean_grp.add_argument('-j', '--jobs', default=4, type=int,
|
|
|
003633 |
@@ -218,59 +237,28 @@ third party.
|
|
|
003633 |
|
|
|
003633 |
In the event the target path is not an archive, abort.
|
|
|
003633 |
"""
|
|
|
003633 |
- if not tarfile.is_tarfile(self.opts.target):
|
|
|
003633 |
- self.ui_log.error(
|
|
|
003633 |
- "Invalid target: must be directory or tar archive"
|
|
|
003633 |
- )
|
|
|
003633 |
- self._exit(1)
|
|
|
003633 |
-
|
|
|
003633 |
- archive = tarfile.open(self.opts.target)
|
|
|
003633 |
- self.arc_name = self.opts.target.split('/')[-1].split('.')[:-2][0]
|
|
|
003633 |
-
|
|
|
003633 |
- try:
|
|
|
003633 |
- archive.getmember(os.path.join(self.arc_name, 'sos_logs'))
|
|
|
003633 |
- except Exception:
|
|
|
003633 |
- # this is not an sos archive
|
|
|
003633 |
- self.ui_log.error("Invalid target: not an sos archive")
|
|
|
003633 |
- self._exit(1)
|
|
|
003633 |
-
|
|
|
003633 |
- # see if there are archives within this archive
|
|
|
003633 |
- nested_archives = []
|
|
|
003633 |
- for _file in archive.getmembers():
|
|
|
003633 |
- if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
|
|
|
003633 |
- (_file.name.endswith(('.md5', '.sha256')))):
|
|
|
003633 |
- nested_archives.append(_file.name.split('/')[-1])
|
|
|
003633 |
-
|
|
|
003633 |
- if nested_archives:
|
|
|
003633 |
- self.log_info("Found nested archive(s), extracting top level")
|
|
|
003633 |
- nested_path = self.extract_archive(archive)
|
|
|
003633 |
- for arc_file in os.listdir(nested_path):
|
|
|
003633 |
- if re.match('sosreport.*.tar.*', arc_file):
|
|
|
003633 |
- if arc_file.endswith(('.md5', '.sha256')):
|
|
|
003633 |
- continue
|
|
|
003633 |
- self.report_paths.append(os.path.join(nested_path,
|
|
|
003633 |
- arc_file))
|
|
|
003633 |
- # add the toplevel extracted archive
|
|
|
003633 |
- self.report_paths.append(nested_path)
|
|
|
003633 |
+ _arc = None
|
|
|
003633 |
+ if self.opts.archive_type != 'auto':
|
|
|
003633 |
+ check_type = self.opts.archive_type.replace('-', '_')
|
|
|
003633 |
+ for archive in self.archive_types:
|
|
|
003633 |
+ if archive.type_name == check_type:
|
|
|
003633 |
+ _arc = archive(self.opts.target, self.tmpdir)
|
|
|
003633 |
else:
|
|
|
003633 |
- self.report_paths.append(self.opts.target)
|
|
|
003633 |
-
|
|
|
003633 |
- archive.close()
|
|
|
003633 |
-
|
|
|
003633 |
- def extract_archive(self, archive):
|
|
|
003633 |
- """Extract an archive into our tmpdir so that we may inspect it or
|
|
|
003633 |
- iterate through its contents for obfuscation
|
|
|
003633 |
-
|
|
|
003633 |
- Positional arguments:
|
|
|
003633 |
-
|
|
|
003633 |
- :param archive: An open TarFile object for the archive
|
|
|
003633 |
-
|
|
|
003633 |
- """
|
|
|
003633 |
- if not isinstance(archive, tarfile.TarFile):
|
|
|
003633 |
- archive = tarfile.open(archive)
|
|
|
003633 |
- path = os.path.join(self.tmpdir, 'cleaner')
|
|
|
003633 |
- archive.extractall(path)
|
|
|
003633 |
- return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
|
|
|
003633 |
+ for arc in self.archive_types:
|
|
|
003633 |
+ if arc.check_is_type(self.opts.target):
|
|
|
003633 |
+ _arc = arc(self.opts.target, self.tmpdir)
|
|
|
003633 |
+ break
|
|
|
003633 |
+ if not _arc:
|
|
|
003633 |
+ return
|
|
|
003633 |
+ self.report_paths.append(_arc)
|
|
|
003633 |
+ if _arc.is_nested:
|
|
|
003633 |
+ self.report_paths.extend(_arc.get_nested_archives())
|
|
|
003633 |
+ # We need to preserve the top level archive until all
|
|
|
003633 |
+ # nested archives are processed
|
|
|
003633 |
+ self.report_paths.remove(_arc)
|
|
|
003633 |
+ self.nested_archive = _arc
|
|
|
003633 |
+ if self.nested_archive:
|
|
|
003633 |
+ self.nested_archive.ui_name = self.nested_archive.description
|
|
|
003633 |
|
|
|
003633 |
def execute(self):
|
|
|
003633 |
"""SoSCleaner will begin by inspecting the TARGET option to determine
|
|
|
003633 |
@@ -283,6 +271,7 @@ third party.
|
|
|
003633 |
be unpacked, cleaned, and repacked and the final top-level archive will
|
|
|
003633 |
then be repacked as well.
|
|
|
003633 |
"""
|
|
|
003633 |
+ self.arc_name = self.opts.target.split('/')[-1].split('.tar')[0]
|
|
|
003633 |
if self.from_cmdline:
|
|
|
003633 |
self.print_disclaimer()
|
|
|
003633 |
self.report_paths = []
|
|
|
003633 |
@@ -290,23 +279,11 @@ third party.
|
|
|
003633 |
self.ui_log.error("Invalid target: no such file or directory %s"
|
|
|
003633 |
% self.opts.target)
|
|
|
003633 |
self._exit(1)
|
|
|
003633 |
- if os.path.isdir(self.opts.target):
|
|
|
003633 |
- self.arc_name = self.opts.target.split('/')[-1]
|
|
|
003633 |
- for _file in os.listdir(self.opts.target):
|
|
|
003633 |
- if _file == 'sos_logs':
|
|
|
003633 |
- self.report_paths.append(self.opts.target)
|
|
|
003633 |
- if (_file.startswith('sosreport') and
|
|
|
003633 |
- (_file.endswith(".tar.gz") or _file.endswith(".tar.xz"))):
|
|
|
003633 |
- self.report_paths.append(os.path.join(self.opts.target,
|
|
|
003633 |
- _file))
|
|
|
003633 |
- if not self.report_paths:
|
|
|
003633 |
- self.ui_log.error("Invalid target: not an sos directory")
|
|
|
003633 |
- self._exit(1)
|
|
|
003633 |
- else:
|
|
|
003633 |
- self.inspect_target_archive()
|
|
|
003633 |
+
|
|
|
003633 |
+ self.inspect_target_archive()
|
|
|
003633 |
|
|
|
003633 |
if not self.report_paths:
|
|
|
003633 |
- self.ui_log.error("No valid sos archives or directories found\n")
|
|
|
003633 |
+ self.ui_log.error("No valid archives or directories found\n")
|
|
|
003633 |
self._exit(1)
|
|
|
003633 |
|
|
|
003633 |
# we have at least one valid target to obfuscate
|
|
|
003633 |
@@ -334,33 +311,7 @@ third party.
|
|
|
003633 |
|
|
|
003633 |
final_path = None
|
|
|
003633 |
if len(self.completed_reports) > 1:
|
|
|
003633 |
- # we have an archive of archives, so repack the obfuscated tarball
|
|
|
003633 |
- arc_name = self.arc_name + '-obfuscated'
|
|
|
003633 |
- self.setup_archive(name=arc_name)
|
|
|
003633 |
- for arc in self.completed_reports:
|
|
|
003633 |
- if arc.is_tarfile:
|
|
|
003633 |
- arc_dest = self.obfuscate_string(
|
|
|
003633 |
- arc.final_archive_path.split('/')[-1]
|
|
|
003633 |
- )
|
|
|
003633 |
- self.archive.add_file(arc.final_archive_path,
|
|
|
003633 |
- dest=arc_dest)
|
|
|
003633 |
- checksum = self.get_new_checksum(arc.final_archive_path)
|
|
|
003633 |
- if checksum is not None:
|
|
|
003633 |
- dname = self.obfuscate_string(
|
|
|
003633 |
- "checksums/%s.%s" % (arc_dest, self.hash_name)
|
|
|
003633 |
- )
|
|
|
003633 |
- self.archive.add_string(checksum, dest=dname)
|
|
|
003633 |
- else:
|
|
|
003633 |
- for dirname, dirs, files in os.walk(arc.archive_path):
|
|
|
003633 |
- for filename in files:
|
|
|
003633 |
- if filename.startswith('sosreport'):
|
|
|
003633 |
- continue
|
|
|
003633 |
- fname = os.path.join(dirname, filename)
|
|
|
003633 |
- dnm = self.obfuscate_string(
|
|
|
003633 |
- fname.split(arc.archive_name)[-1].lstrip('/')
|
|
|
003633 |
- )
|
|
|
003633 |
- self.archive.add_file(fname, dest=dnm)
|
|
|
003633 |
- arc_path = self.archive.finalize(self.opts.compression_type)
|
|
|
003633 |
+ arc_path = self.rebuild_nested_archive()
|
|
|
003633 |
else:
|
|
|
003633 |
arc = self.completed_reports[0]
|
|
|
003633 |
arc_path = arc.final_archive_path
|
|
|
003633 |
@@ -371,8 +322,7 @@ third party.
|
|
|
003633 |
)
|
|
|
003633 |
with open(os.path.join(self.sys_tmp, chksum_name), 'w') as cf:
|
|
|
003633 |
cf.write(checksum)
|
|
|
003633 |
-
|
|
|
003633 |
- self.write_cleaner_log()
|
|
|
003633 |
+ self.write_cleaner_log()
|
|
|
003633 |
|
|
|
003633 |
final_path = self.obfuscate_string(
|
|
|
003633 |
os.path.join(self.sys_tmp, arc_path.split('/')[-1])
|
|
|
003633 |
@@ -393,6 +343,30 @@ third party.
|
|
|
003633 |
|
|
|
003633 |
self.cleanup()
|
|
|
003633 |
|
|
|
003633 |
+ def rebuild_nested_archive(self):
|
|
|
003633 |
+ """Handles repacking the nested tarball, now containing only obfuscated
|
|
|
003633 |
+ copies of the reports, log files, manifest, etc...
|
|
|
003633 |
+ """
|
|
|
003633 |
+ # we have an archive of archives, so repack the obfuscated tarball
|
|
|
003633 |
+ arc_name = self.arc_name + '-obfuscated'
|
|
|
003633 |
+ self.setup_archive(name=arc_name)
|
|
|
003633 |
+ for archive in self.completed_reports:
|
|
|
003633 |
+ arc_dest = archive.final_archive_path.split('/')[-1]
|
|
|
003633 |
+ checksum = self.get_new_checksum(archive.final_archive_path)
|
|
|
003633 |
+ if checksum is not None:
|
|
|
003633 |
+ dname = "checksums/%s.%s" % (arc_dest, self.hash_name)
|
|
|
003633 |
+ self.archive.add_string(checksum, dest=dname)
|
|
|
003633 |
+ for dirn, dirs, files in os.walk(self.nested_archive.extracted_path):
|
|
|
003633 |
+ for filename in files:
|
|
|
003633 |
+ fname = os.path.join(dirn, filename)
|
|
|
003633 |
+ dname = fname.split(self.nested_archive.extracted_path)[-1]
|
|
|
003633 |
+ dname = dname.lstrip('/')
|
|
|
003633 |
+ self.archive.add_file(fname, dest=dname)
|
|
|
003633 |
+ # remove it now so we don't balloon our fs space needs
|
|
|
003633 |
+ os.remove(fname)
|
|
|
003633 |
+ self.write_cleaner_log(archive=True)
|
|
|
003633 |
+ return self.archive.finalize(self.opts.compression_type)
|
|
|
003633 |
+
|
|
|
003633 |
def compile_mapping_dict(self):
|
|
|
003633 |
"""Build a dict that contains each parser's map as a key, with the
|
|
|
003633 |
contents as that key's value. This will then be written to disk in the
|
|
|
003633 |
@@ -441,7 +415,7 @@ third party.
|
|
|
003633 |
self.log_error("Could not update mapping config file: %s"
|
|
|
003633 |
% err)
|
|
|
003633 |
|
|
|
003633 |
- def write_cleaner_log(self):
|
|
|
003633 |
+ def write_cleaner_log(self, archive=False):
|
|
|
003633 |
"""When invoked via the command line, the logging from SoSCleaner will
|
|
|
003633 |
not be added to the archive(s) it processes, so we need to write it
|
|
|
003633 |
separately to disk
|
|
|
003633 |
@@ -454,6 +428,10 @@ third party.
|
|
|
003633 |
for line in self.sos_log_file.readlines():
|
|
|
003633 |
logfile.write(line)
|
|
|
003633 |
|
|
|
003633 |
+ if archive:
|
|
|
003633 |
+ self.obfuscate_file(log_name)
|
|
|
003633 |
+ self.archive.add_file(log_name, dest="sos_logs/cleaner.log")
|
|
|
003633 |
+
|
|
|
003633 |
def get_new_checksum(self, archive_path):
|
|
|
003633 |
"""Calculate a new checksum for the obfuscated archive, as the previous
|
|
|
003633 |
checksum will no longer be valid
|
|
|
003633 |
@@ -481,11 +459,11 @@ third party.
|
|
|
003633 |
be obfuscated concurrently.
|
|
|
003633 |
"""
|
|
|
003633 |
try:
|
|
|
003633 |
- if len(self.report_paths) > 1:
|
|
|
003633 |
- msg = ("Found %s total reports to obfuscate, processing up to "
|
|
|
003633 |
- "%s concurrently\n"
|
|
|
003633 |
- % (len(self.report_paths), self.opts.jobs))
|
|
|
003633 |
- self.ui_log.info(msg)
|
|
|
003633 |
+ msg = (
|
|
|
003633 |
+ "Found %s total reports to obfuscate, processing up to %s "
|
|
|
003633 |
+ "concurrently\n" % (len(self.report_paths), self.opts.jobs)
|
|
|
003633 |
+ )
|
|
|
003633 |
+ self.ui_log.info(msg)
|
|
|
003633 |
if self.opts.keep_binary_files:
|
|
|
003633 |
self.ui_log.warning(
|
|
|
003633 |
"WARNING: binary files that potentially contain sensitive "
|
|
|
003633 |
@@ -494,53 +472,67 @@ third party.
|
|
|
003633 |
pool = ThreadPoolExecutor(self.opts.jobs)
|
|
|
003633 |
pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
|
|
|
003633 |
pool.shutdown(wait=True)
|
|
|
003633 |
+ # finally, obfuscate the nested archive if one exists
|
|
|
003633 |
+ if self.nested_archive:
|
|
|
003633 |
+ self._replace_obfuscated_archives()
|
|
|
003633 |
+ self.obfuscate_report(self.nested_archive)
|
|
|
003633 |
except KeyboardInterrupt:
|
|
|
003633 |
self.ui_log.info("Exiting on user cancel")
|
|
|
003633 |
os._exit(130)
|
|
|
003633 |
|
|
|
003633 |
+ def _replace_obfuscated_archives(self):
|
|
|
003633 |
+ """When we have a nested archive, we need to rebuild the original
|
|
|
003633 |
+ archive, which entails replacing the existing archives with their
|
|
|
003633 |
+ obfuscated counterparts
|
|
|
003633 |
+ """
|
|
|
003633 |
+ for archive in self.completed_reports:
|
|
|
003633 |
+ os.remove(archive.archive_path)
|
|
|
003633 |
+ dest = self.nested_archive.extracted_path
|
|
|
003633 |
+ tarball = archive.final_archive_path.split('/')[-1]
|
|
|
003633 |
+ dest_name = os.path.join(dest, tarball)
|
|
|
003633 |
+ shutil.move(archive.final_archive_path, dest)
|
|
|
003633 |
+ archive.final_archive_path = dest_name
|
|
|
003633 |
+
|
|
|
003633 |
def preload_all_archives_into_maps(self):
|
|
|
003633 |
"""Before doing the actual obfuscation, if we have multiple archives
|
|
|
003633 |
to obfuscate then we need to preload each of them into the mappings
|
|
|
003633 |
to ensure that node1 is obfuscated in node2 as well as node2 being
|
|
|
003633 |
obfuscated in node1's archive.
|
|
|
003633 |
"""
|
|
|
003633 |
- self.log_info("Pre-loading multiple archives into obfuscation maps")
|
|
|
003633 |
+ self.log_info("Pre-loading all archives into obfuscation maps")
|
|
|
003633 |
for _arc in self.report_paths:
|
|
|
003633 |
- is_dir = os.path.isdir(_arc)
|
|
|
003633 |
- if is_dir:
|
|
|
003633 |
- _arc_name = _arc
|
|
|
003633 |
- else:
|
|
|
003633 |
- archive = tarfile.open(_arc)
|
|
|
003633 |
- _arc_name = _arc.split('/')[-1].split('.tar')[0]
|
|
|
003633 |
- # for each parser, load the map_prep_file into memory, and then
|
|
|
003633 |
- # send that for obfuscation. We don't actually obfuscate the file
|
|
|
003633 |
- # here, do that in the normal archive loop
|
|
|
003633 |
for _parser in self.parsers:
|
|
|
003633 |
- if not _parser.prep_map_file:
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ pfile = _arc.prep_files[_parser.name.lower().split()[0]]
|
|
|
003633 |
+ if not pfile:
|
|
|
003633 |
+ continue
|
|
|
003633 |
+ except (IndexError, KeyError):
|
|
|
003633 |
continue
|
|
|
003633 |
- if isinstance(_parser.prep_map_file, str):
|
|
|
003633 |
- _parser.prep_map_file = [_parser.prep_map_file]
|
|
|
003633 |
- for parse_file in _parser.prep_map_file:
|
|
|
003633 |
- _arc_path = os.path.join(_arc_name, parse_file)
|
|
|
003633 |
+ if isinstance(pfile, str):
|
|
|
003633 |
+ pfile = [pfile]
|
|
|
003633 |
+ for parse_file in pfile:
|
|
|
003633 |
+ self.log_debug("Attempting to load %s" % parse_file)
|
|
|
003633 |
try:
|
|
|
003633 |
- if is_dir:
|
|
|
003633 |
- _pfile = open(_arc_path, 'r')
|
|
|
003633 |
- content = _pfile.read()
|
|
|
003633 |
- else:
|
|
|
003633 |
- _pfile = archive.extractfile(_arc_path)
|
|
|
003633 |
- content = _pfile.read().decode('utf-8')
|
|
|
003633 |
- _pfile.close()
|
|
|
003633 |
+ content = _arc.get_file_content(parse_file)
|
|
|
003633 |
+ if not content:
|
|
|
003633 |
+ continue
|
|
|
003633 |
if isinstance(_parser, SoSUsernameParser):
|
|
|
003633 |
_parser.load_usernames_into_map(content)
|
|
|
003633 |
- for line in content.splitlines():
|
|
|
003633 |
- if isinstance(_parser, SoSHostnameParser):
|
|
|
003633 |
- _parser.load_hostname_into_map(line)
|
|
|
003633 |
- self.obfuscate_line(line)
|
|
|
003633 |
+ elif isinstance(_parser, SoSHostnameParser):
|
|
|
003633 |
+ _parser.load_hostname_into_map(
|
|
|
003633 |
+ content.splitlines()[0]
|
|
|
003633 |
+ )
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ for line in content.splitlines():
|
|
|
003633 |
+ self.obfuscate_line(line)
|
|
|
003633 |
except Exception as err:
|
|
|
003633 |
- self.log_debug("Could not prep %s: %s"
|
|
|
003633 |
- % (_arc_path, err))
|
|
|
003633 |
+ self.log_info(
|
|
|
003633 |
+ "Could not prepare %s from %s (archive: %s): %s"
|
|
|
003633 |
+ % (_parser.name, parse_file, _arc.archive_name,
|
|
|
003633 |
+ err)
|
|
|
003633 |
+ )
|
|
|
003633 |
|
|
|
003633 |
- def obfuscate_report(self, report):
|
|
|
003633 |
+ def obfuscate_report(self, archive):
|
|
|
003633 |
"""Individually handle each archive or directory we've discovered by
|
|
|
003633 |
running through each file therein.
|
|
|
003633 |
|
|
|
003633 |
@@ -549,17 +541,12 @@ third party.
|
|
|
003633 |
:param report str: Filepath to the directory or archive
|
|
|
003633 |
"""
|
|
|
003633 |
try:
|
|
|
003633 |
- if not os.access(report, os.W_OK):
|
|
|
003633 |
- msg = "Insufficient permissions on %s" % report
|
|
|
003633 |
- self.log_info(msg)
|
|
|
003633 |
- self.ui_log.error(msg)
|
|
|
003633 |
- return
|
|
|
003633 |
-
|
|
|
003633 |
- archive = SoSObfuscationArchive(report, self.tmpdir)
|
|
|
003633 |
arc_md = self.cleaner_md.add_section(archive.archive_name)
|
|
|
003633 |
start_time = datetime.now()
|
|
|
003633 |
arc_md.add_field('start_time', start_time)
|
|
|
003633 |
- archive.extract()
|
|
|
003633 |
+ # don't double extract nested archives
|
|
|
003633 |
+ if not archive.is_extracted:
|
|
|
003633 |
+ archive.extract()
|
|
|
003633 |
archive.report_msg("Beginning obfuscation...")
|
|
|
003633 |
|
|
|
003633 |
file_list = archive.get_file_list()
|
|
|
003633 |
@@ -586,27 +573,28 @@ third party.
|
|
|
003633 |
caller=archive.archive_name)
|
|
|
003633 |
|
|
|
003633 |
# if the archive was already a tarball, repack it
|
|
|
003633 |
- method = archive.get_compression()
|
|
|
003633 |
- if method:
|
|
|
003633 |
- archive.report_msg("Re-compressing...")
|
|
|
003633 |
- try:
|
|
|
003633 |
- archive.rename_top_dir(
|
|
|
003633 |
- self.obfuscate_string(archive.archive_name)
|
|
|
003633 |
- )
|
|
|
003633 |
- archive.compress(method)
|
|
|
003633 |
- except Exception as err:
|
|
|
003633 |
- self.log_debug("Archive %s failed to compress: %s"
|
|
|
003633 |
- % (archive.archive_name, err))
|
|
|
003633 |
- archive.report_msg("Failed to re-compress archive: %s"
|
|
|
003633 |
- % err)
|
|
|
003633 |
- return
|
|
|
003633 |
+ if not archive.is_nested:
|
|
|
003633 |
+ method = archive.get_compression()
|
|
|
003633 |
+ if method:
|
|
|
003633 |
+ archive.report_msg("Re-compressing...")
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ archive.rename_top_dir(
|
|
|
003633 |
+ self.obfuscate_string(archive.archive_name)
|
|
|
003633 |
+ )
|
|
|
003633 |
+ archive.compress(method)
|
|
|
003633 |
+ except Exception as err:
|
|
|
003633 |
+ self.log_debug("Archive %s failed to compress: %s"
|
|
|
003633 |
+ % (archive.archive_name, err))
|
|
|
003633 |
+ archive.report_msg("Failed to re-compress archive: %s"
|
|
|
003633 |
+ % err)
|
|
|
003633 |
+ return
|
|
|
003633 |
+ self.completed_reports.append(archive)
|
|
|
003633 |
|
|
|
003633 |
end_time = datetime.now()
|
|
|
003633 |
arc_md.add_field('end_time', end_time)
|
|
|
003633 |
arc_md.add_field('run_time', end_time - start_time)
|
|
|
003633 |
arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
|
|
|
003633 |
arc_md.add_field('total_substitutions', archive.total_sub_count)
|
|
|
003633 |
- self.completed_reports.append(archive)
|
|
|
003633 |
rmsg = ''
|
|
|
003633 |
if archive.removed_file_count:
|
|
|
003633 |
rmsg = " [removed %s unprocessable files]"
|
|
|
003633 |
@@ -615,7 +603,7 @@ third party.
|
|
|
003633 |
|
|
|
003633 |
except Exception as err:
|
|
|
003633 |
self.ui_log.info("Exception while processing %s: %s"
|
|
|
003633 |
- % (report, err))
|
|
|
003633 |
+ % (archive.archive_name, err))
|
|
|
003633 |
|
|
|
003633 |
def obfuscate_file(self, filename, short_name=None, arc_name=None):
|
|
|
003633 |
"""Obfuscate and individual file, line by line.
|
|
|
003633 |
@@ -635,6 +623,8 @@ third party.
|
|
|
003633 |
# the requested file doesn't exist in the archive
|
|
|
003633 |
return
|
|
|
003633 |
subs = 0
|
|
|
003633 |
+ if not short_name:
|
|
|
003633 |
+ short_name = filename.split('/')[-1]
|
|
|
003633 |
if not os.path.islink(filename):
|
|
|
003633 |
# don't run the obfuscation on the link, but on the actual file
|
|
|
003633 |
# at some other point.
|
|
|
003633 |
@@ -745,3 +735,5 @@ third party.
|
|
|
003633 |
for parser in self.parsers:
|
|
|
003633 |
_sec = parse_sec.add_section(parser.name.replace(' ', '_').lower())
|
|
|
003633 |
_sec.add_field('entries', len(parser.mapping.dataset.keys()))
|
|
|
003633 |
+
|
|
|
003633 |
+# vim: set et ts=4 sw=4 :
|
|
|
003633 |
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/archives/__init__.py
|
|
|
003633 |
similarity index 81%
|
|
|
003633 |
rename from sos/cleaner/obfuscation_archive.py
|
|
|
003633 |
rename to sos/cleaner/archives/__init__.py
|
|
|
003633 |
index ea0b7012..795c5a78 100644
|
|
|
003633 |
--- a/sos/cleaner/obfuscation_archive.py
|
|
|
003633 |
+++ b/sos/cleaner/archives/__init__.py
|
|
|
003633 |
@@ -40,6 +40,10 @@ class SoSObfuscationArchive():
|
|
|
003633 |
file_sub_list = []
|
|
|
003633 |
total_sub_count = 0
|
|
|
003633 |
removed_file_count = 0
|
|
|
003633 |
+ type_name = 'undetermined'
|
|
|
003633 |
+ description = 'undetermined'
|
|
|
003633 |
+ is_nested = False
|
|
|
003633 |
+ prep_files = {}
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, archive_path, tmpdir):
|
|
|
003633 |
self.archive_path = archive_path
|
|
|
003633 |
@@ -50,7 +54,43 @@ class SoSObfuscationArchive():
|
|
|
003633 |
self.soslog = logging.getLogger('sos')
|
|
|
003633 |
self.ui_log = logging.getLogger('sos_ui')
|
|
|
003633 |
self.skip_list = self._load_skip_list()
|
|
|
003633 |
- self.log_info("Loaded %s as an archive" % self.archive_path)
|
|
|
003633 |
+ self.is_extracted = False
|
|
|
003633 |
+ self._load_self()
|
|
|
003633 |
+ self.archive_root = ''
|
|
|
003633 |
+ self.log_info(
|
|
|
003633 |
+ "Loaded %s as type %s"
|
|
|
003633 |
+ % (self.archive_path, self.description)
|
|
|
003633 |
+ )
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ """Check if the archive is a well-known type we directly support"""
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+ def _load_self(self):
|
|
|
003633 |
+ if self.is_tarfile:
|
|
|
003633 |
+ self.tarobj = tarfile.open(self.archive_path)
|
|
|
003633 |
+
|
|
|
003633 |
+ def get_nested_archives(self):
|
|
|
003633 |
+ """Return a list of ObfuscationArchives that represent additional
|
|
|
003633 |
+ archives found within the target archive. For example, an archive from
|
|
|
003633 |
+ `sos collect` will return a list of ``SoSReportArchive`` objects.
|
|
|
003633 |
+
|
|
|
003633 |
+ This should be overridden by individual types of ObfuscationArchive's
|
|
|
003633 |
+ """
|
|
|
003633 |
+ return []
|
|
|
003633 |
+
|
|
|
003633 |
+ def get_archive_root(self):
|
|
|
003633 |
+ """Set the root path for the archive that should be prepended to any
|
|
|
003633 |
+ filenames given to methods in this class.
|
|
|
003633 |
+ """
|
|
|
003633 |
+ if self.is_tarfile:
|
|
|
003633 |
+ toplevel = self.tarobj.firstmember
|
|
|
003633 |
+ if toplevel.isdir():
|
|
|
003633 |
+ return toplevel.name
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ return os.sep
|
|
|
003633 |
+ return os.path.abspath(self.archive_path)
|
|
|
003633 |
|
|
|
003633 |
def report_msg(self, msg):
|
|
|
003633 |
"""Helper to easily format ui messages on a per-report basis"""
|
|
|
003633 |
@@ -96,10 +136,42 @@ class SoSObfuscationArchive():
|
|
|
003633 |
os.remove(full_fname)
|
|
|
003633 |
self.removed_file_count += 1
|
|
|
003633 |
|
|
|
003633 |
- def extract(self):
|
|
|
003633 |
+ def format_file_name(self, fname):
|
|
|
003633 |
+ """Based on the type of archive we're dealing with, do whatever that
|
|
|
003633 |
+ archive requires to a provided **relative** filepath to be able to
|
|
|
003633 |
+ access it within the archive
|
|
|
003633 |
+ """
|
|
|
003633 |
+ if not self.is_extracted:
|
|
|
003633 |
+ if not self.archive_root:
|
|
|
003633 |
+ self.archive_root = self.get_archive_root()
|
|
|
003633 |
+ return os.path.join(self.archive_root, fname)
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ return os.path.join(self.extracted_path, fname)
|
|
|
003633 |
+
|
|
|
003633 |
+ def get_file_content(self, fname):
|
|
|
003633 |
+ """Return the content from the specified fname. Particularly useful for
|
|
|
003633 |
+ tarball-type archives so we can retrieve prep file contents prior to
|
|
|
003633 |
+ extracting the entire archive
|
|
|
003633 |
+ """
|
|
|
003633 |
+ if self.is_extracted is False and self.is_tarfile:
|
|
|
003633 |
+ filename = self.format_file_name(fname)
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ return self.tarobj.extractfile(filename).read().decode('utf-8')
|
|
|
003633 |
+ except KeyError:
|
|
|
003633 |
+ self.log_debug(
|
|
|
003633 |
+ "Unable to retrieve %s: no such file in archive" % fname
|
|
|
003633 |
+ )
|
|
|
003633 |
+ return ''
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ with open(self.format_file_name(fname), 'r') as to_read:
|
|
|
003633 |
+ return to_read.read()
|
|
|
003633 |
+
|
|
|
003633 |
+ def extract(self, quiet=False):
|
|
|
003633 |
if self.is_tarfile:
|
|
|
003633 |
- self.report_msg("Extracting...")
|
|
|
003633 |
+ if not quiet:
|
|
|
003633 |
+ self.report_msg("Extracting...")
|
|
|
003633 |
self.extracted_path = self.extract_self()
|
|
|
003633 |
+ self.is_extracted = True
|
|
|
003633 |
else:
|
|
|
003633 |
self.extracted_path = self.archive_path
|
|
|
003633 |
# if we're running as non-root (e.g. collector), then we can have a
|
|
|
003633 |
@@ -317,3 +389,5 @@ class SoSObfuscationArchive():
|
|
|
003633 |
return False
|
|
|
003633 |
except UnicodeDecodeError:
|
|
|
003633 |
return True
|
|
|
003633 |
+
|
|
|
003633 |
+# vim: set et ts=4 sw=4 :
|
|
|
003633 |
diff --git a/sos/cleaner/archives/generic.py b/sos/cleaner/archives/generic.py
|
|
|
003633 |
new file mode 100644
|
|
|
003633 |
index 00000000..2ce6f09b
|
|
|
003633 |
--- /dev/null
|
|
|
003633 |
+++ b/sos/cleaner/archives/generic.py
|
|
|
003633 |
@@ -0,0 +1,52 @@
|
|
|
003633 |
+# Copyright 2020 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
+
|
|
|
003633 |
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
|
003633 |
+#
|
|
|
003633 |
+# This copyrighted material is made available to anyone wishing to use,
|
|
|
003633 |
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
|
003633 |
+# version 2 of the GNU General Public License.
|
|
|
003633 |
+#
|
|
|
003633 |
+# See the LICENSE file in the source distribution for further information.
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
|
003633 |
+
|
|
|
003633 |
+import os
|
|
|
003633 |
+import tarfile
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class DataDirArchive(SoSObfuscationArchive):
|
|
|
003633 |
+ """A plain directory on the filesystem that is not directly associated with
|
|
|
003633 |
+ any known or supported collection utility
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'data_dir'
|
|
|
003633 |
+ description = 'unassociated directory'
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ return os.path.isdir(arc_path)
|
|
|
003633 |
+
|
|
|
003633 |
+ def set_archive_root(self):
|
|
|
003633 |
+ return os.path.abspath(self.archive_path)
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class TarballArchive(SoSObfuscationArchive):
|
|
|
003633 |
+ """A generic tar archive that is not associated with any known or supported
|
|
|
003633 |
+ collection utility
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'tarball'
|
|
|
003633 |
+ description = 'unassociated tarball'
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ return tarfile.is_tarfile(arc_path)
|
|
|
003633 |
+ except Exception:
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+ def set_archive_root(self):
|
|
|
003633 |
+ if self.tarobj.firstmember.isdir():
|
|
|
003633 |
+ return self.tarobj.firstmember.name
|
|
|
003633 |
+ return ''
|
|
|
003633 |
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
|
|
|
003633 |
new file mode 100644
|
|
|
003633 |
index 00000000..4401d710
|
|
|
003633 |
--- /dev/null
|
|
|
003633 |
+++ b/sos/cleaner/archives/sos.py
|
|
|
003633 |
@@ -0,0 +1,106 @@
|
|
|
003633 |
+# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
+
|
|
|
003633 |
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
|
003633 |
+#
|
|
|
003633 |
+# This copyrighted material is made available to anyone wishing to use,
|
|
|
003633 |
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
|
003633 |
+# version 2 of the GNU General Public License.
|
|
|
003633 |
+#
|
|
|
003633 |
+# See the LICENSE file in the source distribution for further information.
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
|
003633 |
+
|
|
|
003633 |
+import os
|
|
|
003633 |
+import tarfile
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class SoSReportArchive(SoSObfuscationArchive):
|
|
|
003633 |
+ """This is the class representing an sos report, or in other words the
|
|
|
003633 |
+ type the archive the SoS project natively generates
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'report'
|
|
|
003633 |
+ description = 'sos report archive'
|
|
|
003633 |
+ prep_files = {
|
|
|
003633 |
+ 'hostname': 'sos_commands/host/hostname',
|
|
|
003633 |
+ 'ip': 'sos_commands/networking/ip_-o_addr',
|
|
|
003633 |
+ 'mac': 'sos_commands/networking/ip_-d_address',
|
|
|
003633 |
+ 'username': [
|
|
|
003633 |
+ 'sos_commands/login/lastlog_-u_1000-60000',
|
|
|
003633 |
+ 'sos_commands/login/lastlog_-u_60001-65536',
|
|
|
003633 |
+ 'sos_commands/login/lastlog_-u_65537-4294967295',
|
|
|
003633 |
+ # AD users will be reported here, but favor the lastlog files since
|
|
|
003633 |
+ # those will include local users who have not logged in
|
|
|
003633 |
+ 'sos_commands/login/last'
|
|
|
003633 |
+ ]
|
|
|
003633 |
+ }
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ return tarfile.is_tarfile(arc_path) and 'sosreport-' in arc_path
|
|
|
003633 |
+ except Exception:
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class SoSReportDirectory(SoSReportArchive):
|
|
|
003633 |
+ """This is the archive class representing a build directory, or in other
|
|
|
003633 |
+ words what `sos report --clean` will end up using for in-line obfuscation
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'report_dir'
|
|
|
003633 |
+ description = 'sos report directory'
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ if os.path.isdir(arc_path):
|
|
|
003633 |
+ return 'sos_logs' in os.listdir(arc_path)
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class SoSCollectorArchive(SoSObfuscationArchive):
|
|
|
003633 |
+ """Archive class representing the tarball created by ``sos collect``. It
|
|
|
003633 |
+ will not provide prep files on its own, however it will provide a list
|
|
|
003633 |
+ of SoSReportArchive's which will then be used to prep the parsers
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'collect'
|
|
|
003633 |
+ description = 'sos collect tarball'
|
|
|
003633 |
+ is_nested = True
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ return (tarfile.is_tarfile(arc_path) and 'sos-collect' in arc_path)
|
|
|
003633 |
+ except Exception:
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+ def get_nested_archives(self):
|
|
|
003633 |
+ self.extract(quiet=True)
|
|
|
003633 |
+ _path = self.extracted_path
|
|
|
003633 |
+ archives = []
|
|
|
003633 |
+ for fname in os.listdir(_path):
|
|
|
003633 |
+ arc_name = os.path.join(_path, fname)
|
|
|
003633 |
+ if 'sosreport-' in fname and tarfile.is_tarfile(arc_name):
|
|
|
003633 |
+ archives.append(SoSReportArchive(arc_name, self.tmpdir))
|
|
|
003633 |
+ return archives
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class SoSCollectorDirectory(SoSCollectorArchive):
|
|
|
003633 |
+ """The archive class representing the temp directory used by ``sos
|
|
|
003633 |
+ collect`` when ``--clean`` is used during runtime.
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'collect_dir'
|
|
|
003633 |
+ description = 'sos collect directory'
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ if os.path.isdir(arc_path):
|
|
|
003633 |
+ for fname in os.listdir(arc_path):
|
|
|
003633 |
+ if 'sos-collector-' in fname:
|
|
|
003633 |
+ return True
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+# vim: set et ts=4 sw=4 :
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
|
|
|
003633 |
index af6e375e..e62fd938 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/__init__.py
|
|
|
003633 |
@@ -37,11 +37,6 @@ class SoSCleanerParser():
|
|
|
003633 |
:cvar map_file_key: The key in the ``map_file`` to read when loading
|
|
|
003633 |
previous obfuscation matches
|
|
|
003633 |
:vartype map_file_key: ``str``
|
|
|
003633 |
-
|
|
|
003633 |
-
|
|
|
003633 |
- :cvar prep_map_file: File to read from an archive to pre-seed the map with
|
|
|
003633 |
- matches. E.G. ip_addr for loading IP addresses
|
|
|
003633 |
- :vartype prep_map_fie: ``str``
|
|
|
003633 |
"""
|
|
|
003633 |
|
|
|
003633 |
name = 'Undefined Parser'
|
|
|
003633 |
@@ -49,7 +44,6 @@ class SoSCleanerParser():
|
|
|
003633 |
skip_line_patterns = []
|
|
|
003633 |
skip_files = []
|
|
|
003633 |
map_file_key = 'unset'
|
|
|
003633 |
- prep_map_file = []
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, config={}):
|
|
|
003633 |
if self.map_file_key in config:
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
index 71e13d3f..daa76a62 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
@@ -16,7 +16,6 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
|
003633 |
|
|
|
003633 |
name = 'Hostname Parser'
|
|
|
003633 |
map_file_key = 'hostname_map'
|
|
|
003633 |
- prep_map_file = 'sos_commands/host/hostname'
|
|
|
003633 |
regex_patterns = [
|
|
|
003633 |
r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
|
|
|
003633 |
]
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
index 525139e8..71d38be8 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
@@ -41,7 +41,6 @@ class SoSIPParser(SoSCleanerParser):
|
|
|
003633 |
]
|
|
|
003633 |
|
|
|
003633 |
map_file_key = 'ip_map'
|
|
|
003633 |
- prep_map_file = 'sos_commands/networking/ip_-o_addr'
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, config):
|
|
|
003633 |
self.mapping = SoSIPMap()
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
|
|
|
003633 |
index 68de3727..694c6073 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/keyword_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/keyword_parser.py
|
|
|
003633 |
@@ -20,7 +20,6 @@ class SoSKeywordParser(SoSCleanerParser):
|
|
|
003633 |
|
|
|
003633 |
name = 'Keyword Parser'
|
|
|
003633 |
map_file_key = 'keyword_map'
|
|
|
003633 |
- prep_map_file = ''
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, config, keywords=None, keyword_file=None):
|
|
|
003633 |
self.mapping = SoSKeywordMap()
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/mac_parser.py b/sos/cleaner/parsers/mac_parser.py
|
|
|
003633 |
index 7ca80b8d..c74288cf 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/mac_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/mac_parser.py
|
|
|
003633 |
@@ -30,7 +30,6 @@ class SoSMacParser(SoSCleanerParser):
|
|
|
003633 |
'534f:53'
|
|
|
003633 |
)
|
|
|
003633 |
map_file_key = 'mac_map'
|
|
|
003633 |
- prep_map_file = 'sos_commands/networking/ip_-d_address'
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, config):
|
|
|
003633 |
self.mapping = SoSMacMap()
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
index b142e371..35377a31 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
@@ -25,14 +25,6 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
003633 |
|
|
|
003633 |
name = 'Username Parser'
|
|
|
003633 |
map_file_key = 'username_map'
|
|
|
003633 |
- prep_map_file = [
|
|
|
003633 |
- 'sos_commands/login/lastlog_-u_1000-60000',
|
|
|
003633 |
- 'sos_commands/login/lastlog_-u_60001-65536',
|
|
|
003633 |
- 'sos_commands/login/lastlog_-u_65537-4294967295',
|
|
|
003633 |
- # AD users will be reported here, but favor the lastlog files since
|
|
|
003633 |
- # those will include local users who have not logged in
|
|
|
003633 |
- 'sos_commands/login/last'
|
|
|
003633 |
- ]
|
|
|
003633 |
regex_patterns = []
|
|
|
003633 |
skip_list = [
|
|
|
003633 |
'core',
|
|
|
003633 |
diff --git a/tests/cleaner_tests/existing_archive.py b/tests/cleaner_tests/existing_archive.py
|
|
|
003633 |
index 0eaf6c8d..e13d1cae 100644
|
|
|
003633 |
--- a/tests/cleaner_tests/existing_archive.py
|
|
|
003633 |
+++ b/tests/cleaner_tests/existing_archive.py
|
|
|
003633 |
@@ -28,6 +28,13 @@ class ExistingArchiveCleanTest(StageTwoReportTest):
|
|
|
003633 |
def test_obfuscation_log_created(self):
|
|
|
003633 |
self.assertFileExists(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE))
|
|
|
003633 |
|
|
|
003633 |
+ def test_archive_type_correct(self):
|
|
|
003633 |
+ with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
|
|
|
003633 |
+ for line in log:
|
|
|
003633 |
+ if "Loaded %s" % ARCHIVE in line:
|
|
|
003633 |
+ assert 'as type sos report archive' in line, "Incorrect archive type detected: %s" % line
|
|
|
003633 |
+ break
|
|
|
003633 |
+
|
|
|
003633 |
def test_from_cmdline_logged(self):
|
|
|
003633 |
with open(os.path.join(self.tmpdir, '%s-obfuscation.log' % ARCHIVE), 'r') as log:
|
|
|
003633 |
for line in log:
|
|
|
003633 |
diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
index 3b28e7a2..2de54946 100644
|
|
|
003633 |
--- a/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
+++ b/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
@@ -35,6 +35,9 @@ class FullCleanTest(StageTwoReportTest):
|
|
|
003633 |
def test_tarball_named_obfuscated(self):
|
|
|
003633 |
self.assertTrue('obfuscated' in self.archive)
|
|
|
003633 |
|
|
|
003633 |
+ def test_archive_type_correct(self):
|
|
|
003633 |
+ self.assertSosLogContains('Loaded .* as type sos report directory')
|
|
|
003633 |
+
|
|
|
003633 |
def test_hostname_not_in_any_file(self):
|
|
|
003633 |
host = self.sysinfo['pre']['networking']['hostname']
|
|
|
003633 |
# much faster to just use grep here
|
|
|
003633 |
diff --git a/tests/cleaner_tests/report_with_mask.py b/tests/cleaner_tests/report_with_mask.py
|
|
|
003633 |
index 4f94ba33..08e873d4 100644
|
|
|
003633 |
--- a/tests/cleaner_tests/report_with_mask.py
|
|
|
003633 |
+++ b/tests/cleaner_tests/report_with_mask.py
|
|
|
003633 |
@@ -31,6 +31,9 @@ class ReportWithMask(StageOneReportTest):
|
|
|
003633 |
def test_tarball_named_obfuscated(self):
|
|
|
003633 |
self.assertTrue('obfuscated' in self.archive)
|
|
|
003633 |
|
|
|
003633 |
+ def test_archive_type_correct(self):
|
|
|
003633 |
+ self.assertSosLogContains('Loaded .* as type sos report directory')
|
|
|
003633 |
+
|
|
|
003633 |
def test_localhost_was_obfuscated(self):
|
|
|
003633 |
self.assertFileHasContent('/etc/hostname', 'host0')
|
|
|
003633 |
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From 9b119f860eaec089f7ef884ff39c42589a662994 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Wed, 1 Sep 2021 00:34:04 -0400
|
|
|
003633 |
Subject: [PATCH] [hostname_map] Add a catch for single-character hostnames
|
|
|
003633 |
|
|
|
003633 |
If a log file was truncated at a specific boundary in a string of the
|
|
|
003633 |
FQDN of the host such that we only get a couple characters before the
|
|
|
003633 |
rest of the domain, we would previously bodly replace all instances of
|
|
|
003633 |
that character with the obfuscated short name; not very helpful.
|
|
|
003633 |
|
|
|
003633 |
Instead, don't sanitize the short name if this happens and instead
|
|
|
003633 |
obfuscate the whole FQDN as 'unknown.example.com'.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/mappings/hostname_map.py | 9 ++++++++-
|
|
|
003633 |
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
index d4b2c88e..e70a5530 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
@@ -184,7 +184,14 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
hostname = host[0]
|
|
|
003633 |
domain = host[1:]
|
|
|
003633 |
# obfuscate the short name
|
|
|
003633 |
- ob_hostname = self.sanitize_short_name(hostname)
|
|
|
003633 |
+ if len(hostname) > 2:
|
|
|
003633 |
+ ob_hostname = self.sanitize_short_name(hostname)
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ # by best practice it appears the host part of the fqdn was cut
|
|
|
003633 |
+ # off due to some form of truncating, as such don't obfuscate
|
|
|
003633 |
+ # short strings that are likely to throw off obfuscation of
|
|
|
003633 |
+ # unrelated bits and paths
|
|
|
003633 |
+ ob_hostname = 'unknown'
|
|
|
003633 |
ob_domain = self.sanitize_domain(domain)
|
|
|
003633 |
self.dataset[item] = ob_domain
|
|
|
003633 |
return '.'.join([ob_hostname, ob_domain])
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From f3f3e763d7c31b7b7cafdf8dd4dab87056fb7696 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Wed, 1 Sep 2021 15:54:55 -0400
|
|
|
003633 |
Subject: [PATCH] [cleaner] Add support for Insights client archives
|
|
|
003633 |
|
|
|
003633 |
Adds a new type of `SoSObfuscationArchive` to add support for
|
|
|
003633 |
obfuscating archives generated by the Insights project.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
man/en/sos-clean.1 | 1 +
|
|
|
003633 |
sos/cleaner/__init__.py | 4 ++-
|
|
|
003633 |
sos/cleaner/archives/insights.py | 42 ++++++++++++++++++++++++++++++++
|
|
|
003633 |
3 files changed, 46 insertions(+), 1 deletion(-)
|
|
|
003633 |
create mode 100644 sos/cleaner/archives/insights.py
|
|
|
003633 |
|
|
|
003633 |
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
|
|
|
003633 |
index 54026713..358ec0cb 100644
|
|
|
003633 |
--- a/man/en/sos-clean.1
|
|
|
003633 |
+++ b/man/en/sos-clean.1
|
|
|
003633 |
@@ -105,6 +105,7 @@ The following are accepted values for this option:
|
|
|
003633 |
\fBauto\fR Automatically detect the archive type
|
|
|
003633 |
\fBreport\fR An archive generated by \fBsos report\fR
|
|
|
003633 |
\fBcollect\fR An archive generated by \fBsos collect\fR
|
|
|
003633 |
+ \fBinsights\fR An archive generated by the \fBinsights-client\fR package
|
|
|
003633 |
|
|
|
003633 |
The following may also be used, however note that these do not attempt to pre-load
|
|
|
003633 |
any information from the archives into the parsers. This means that, among other limitations,
|
|
|
003633 |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
003633 |
index 6d2eb483..3e08aa28 100644
|
|
|
003633 |
--- a/sos/cleaner/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/__init__.py
|
|
|
003633 |
@@ -29,6 +29,7 @@ from sos.cleaner.archives.sos import (SoSReportArchive, SoSReportDirectory,
|
|
|
003633 |
SoSCollectorArchive,
|
|
|
003633 |
SoSCollectorDirectory)
|
|
|
003633 |
from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
|
|
|
003633 |
+from sos.cleaner.archives.insights import InsightsArchive
|
|
|
003633 |
from sos.utilities import get_human_readable
|
|
|
003633 |
from textwrap import fill
|
|
|
003633 |
|
|
|
003633 |
@@ -100,6 +101,7 @@ class SoSCleaner(SoSComponent):
|
|
|
003633 |
SoSReportArchive,
|
|
|
003633 |
SoSCollectorDirectory,
|
|
|
003633 |
SoSCollectorArchive,
|
|
|
003633 |
+ InsightsArchive,
|
|
|
003633 |
# make sure these two are always last as they are fallbacks
|
|
|
003633 |
DataDirArchive,
|
|
|
003633 |
TarballArchive
|
|
|
003633 |
@@ -194,7 +196,7 @@ third party.
|
|
|
003633 |
help='The directory or archive to obfuscate')
|
|
|
003633 |
clean_grp.add_argument('--archive-type', default='auto',
|
|
|
003633 |
choices=['auto', 'report', 'collect',
|
|
|
003633 |
- 'data-dir', 'tarball'],
|
|
|
003633 |
+ 'insights', 'data-dir', 'tarball'],
|
|
|
003633 |
help=('Specify what kind of archive the target '
|
|
|
003633 |
'was generated as'))
|
|
|
003633 |
clean_grp.add_argument('--domains', action='extend', default=[],
|
|
|
003633 |
diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py
|
|
|
003633 |
new file mode 100644
|
|
|
003633 |
index 00000000..dab48b16
|
|
|
003633 |
--- /dev/null
|
|
|
003633 |
+++ b/sos/cleaner/archives/insights.py
|
|
|
003633 |
@@ -0,0 +1,42 @@
|
|
|
003633 |
+# Copyright 2021 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
+
|
|
|
003633 |
+# This file is part of the sos project: https://github.com/sosreport/sos
|
|
|
003633 |
+#
|
|
|
003633 |
+# This copyrighted material is made available to anyone wishing to use,
|
|
|
003633 |
+# modify, copy, or redistribute it subject to the terms and conditions of
|
|
|
003633 |
+# version 2 of the GNU General Public License.
|
|
|
003633 |
+#
|
|
|
003633 |
+# See the LICENSE file in the source distribution for further information.
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+from sos.cleaner.archives import SoSObfuscationArchive
|
|
|
003633 |
+
|
|
|
003633 |
+import tarfile
|
|
|
003633 |
+
|
|
|
003633 |
+
|
|
|
003633 |
+class InsightsArchive(SoSObfuscationArchive):
|
|
|
003633 |
+ """This class represents archives generated by the insights-client utility
|
|
|
003633 |
+ for RHEL systems.
|
|
|
003633 |
+ """
|
|
|
003633 |
+
|
|
|
003633 |
+ type_name = 'insights'
|
|
|
003633 |
+ description = 'insights-client archive'
|
|
|
003633 |
+
|
|
|
003633 |
+ prep_files = {
|
|
|
003633 |
+ 'hostname': 'data/insights_commands/hostname_-f',
|
|
|
003633 |
+ 'ip': 'data/insights_commands/ip_addr',
|
|
|
003633 |
+ 'mac': 'data/insights_commands/ip_addr'
|
|
|
003633 |
+ }
|
|
|
003633 |
+
|
|
|
003633 |
+ @classmethod
|
|
|
003633 |
+ def check_is_type(cls, arc_path):
|
|
|
003633 |
+ try:
|
|
|
003633 |
+ return tarfile.is_tarfile(arc_path) and 'insights-' in arc_path
|
|
|
003633 |
+ except Exception:
|
|
|
003633 |
+ return False
|
|
|
003633 |
+
|
|
|
003633 |
+ def get_archive_root(self):
|
|
|
003633 |
+ top = self.archive_path.split('/')[-1].split('.tar')[0]
|
|
|
003633 |
+ if self.tarobj.firstmember.name == '.':
|
|
|
003633 |
+ top = './' + top
|
|
|
003633 |
+ return top
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From 9639dc3d240076b55f2a1d04b43ea42bebd09215 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Tue, 16 Nov 2021 17:50:42 -0500
|
|
|
003633 |
Subject: [PATCH] [clean,hostname_parser] Source /etc/hosts for obfuscation
|
|
|
003633 |
|
|
|
003633 |
Up until now, our sourcing of hostnames/domains for obfuscation has been
|
|
|
003633 |
dependent upon the output of the `hostname` command. However, some
|
|
|
003633 |
scenarios have come up where sourcing `/etc/hosts` is advantageous for
|
|
|
003633 |
several reasons:
|
|
|
003633 |
|
|
|
003633 |
First, if `hostname` output is unavailable, this provides a fallback
|
|
|
003633 |
measure.
|
|
|
003633 |
|
|
|
003633 |
Second, `/etc/hosts` is a common place to have short names defined which
|
|
|
003633 |
would otherwise not be detected (or at the very least would result in a
|
|
|
003633 |
race condition based on where/if the short name was elsewhere able to be
|
|
|
003633 |
gleaned from an FQDN), thus leaving the potential for unobfuscated data
|
|
|
003633 |
in an archive.
|
|
|
003633 |
|
|
|
003633 |
Due to both the nature of hostname obfuscation and the malleable syntax
|
|
|
003633 |
of `/etc/hosts`, the parsing of this file needs special handling not
|
|
|
003633 |
covered by our more generic parsing and obfuscation methods.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/__init__.py | 11 ++++++++---
|
|
|
003633 |
sos/cleaner/archives/sos.py | 5 ++++-
|
|
|
003633 |
sos/cleaner/parsers/hostname_parser.py | 19 +++++++++++++++++++
|
|
|
003633 |
3 files changed, 31 insertions(+), 4 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
003633 |
index ed461a8f..3f530d44 100644
|
|
|
003633 |
--- a/sos/cleaner/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/__init__.py
|
|
|
003633 |
@@ -523,9 +523,14 @@ third party.
|
|
|
003633 |
if isinstance(_parser, SoSUsernameParser):
|
|
|
003633 |
_parser.load_usernames_into_map(content)
|
|
|
003633 |
elif isinstance(_parser, SoSHostnameParser):
|
|
|
003633 |
- _parser.load_hostname_into_map(
|
|
|
003633 |
- content.splitlines()[0]
|
|
|
003633 |
- )
|
|
|
003633 |
+ if 'hostname' in parse_file:
|
|
|
003633 |
+ _parser.load_hostname_into_map(
|
|
|
003633 |
+ content.splitlines()[0]
|
|
|
003633 |
+ )
|
|
|
003633 |
+ elif 'etc/hosts' in parse_file:
|
|
|
003633 |
+ _parser.load_hostname_from_etc_hosts(
|
|
|
003633 |
+ content
|
|
|
003633 |
+ )
|
|
|
003633 |
else:
|
|
|
003633 |
for line in content.splitlines():
|
|
|
003633 |
self.obfuscate_line(line)
|
|
|
003633 |
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
|
|
|
003633 |
index 4401d710..f8720c88 100644
|
|
|
003633 |
--- a/sos/cleaner/archives/sos.py
|
|
|
003633 |
+++ b/sos/cleaner/archives/sos.py
|
|
|
003633 |
@@ -23,7 +23,10 @@ class SoSReportArchive(SoSObfuscationArchive):
|
|
|
003633 |
type_name = 'report'
|
|
|
003633 |
description = 'sos report archive'
|
|
|
003633 |
prep_files = {
|
|
|
003633 |
- 'hostname': 'sos_commands/host/hostname',
|
|
|
003633 |
+ 'hostname': [
|
|
|
003633 |
+ 'sos_commands/host/hostname',
|
|
|
003633 |
+ 'etc/hosts'
|
|
|
003633 |
+ ],
|
|
|
003633 |
'ip': 'sos_commands/networking/ip_-o_addr',
|
|
|
003633 |
'mac': 'sos_commands/networking/ip_-d_address',
|
|
|
003633 |
'username': [
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
index daa76a62..0a733bee 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
@@ -61,6 +61,25 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
|
003633 |
self.mapping.add(high_domain)
|
|
|
003633 |
self.mapping.add(hostname_string)
|
|
|
003633 |
|
|
|
003633 |
+ def load_hostname_from_etc_hosts(self, content):
|
|
|
003633 |
+ """Parse an archive's copy of /etc/hosts, which requires handling that
|
|
|
003633 |
+ is separate from the output of the `hostname` command. Just like
|
|
|
003633 |
+ load_hostname_into_map(), this has to be done explicitly and we
|
|
|
003633 |
+ cannot rely upon the more generic methods to do this reliably.
|
|
|
003633 |
+ """
|
|
|
003633 |
+ lines = content.splitlines()
|
|
|
003633 |
+ for line in lines:
|
|
|
003633 |
+ if line.startswith('#') or 'localhost' in line:
|
|
|
003633 |
+ continue
|
|
|
003633 |
+ hostln = line.split()[1:]
|
|
|
003633 |
+ for host in hostln:
|
|
|
003633 |
+ if len(host.split('.')) == 1:
|
|
|
003633 |
+ # only generate a mapping for fqdns but still record the
|
|
|
003633 |
+ # short name here for later obfuscation with parse_line()
|
|
|
003633 |
+ self.short_names.append(host)
|
|
|
003633 |
+ else:
|
|
|
003633 |
+ self.mapping.add(host)
|
|
|
003633 |
+
|
|
|
003633 |
def parse_line(self, line):
|
|
|
003633 |
"""Override the default parse_line() method to also check for the
|
|
|
003633 |
shortname of the host derived from the hostname.
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From c1680226b53452b18f27f2e76c3e0e03e521f935 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Wed, 17 Nov 2021 13:11:33 -0500
|
|
|
003633 |
Subject: [PATCH] [clean, hostname] Fix unintentionally case sensitive
|
|
|
003633 |
shortname handling
|
|
|
003633 |
|
|
|
003633 |
It was discovered that our extra handling for shortnames was
|
|
|
003633 |
unintentionally case sensitive. Fix this to ensure that shortnames are
|
|
|
003633 |
obfuscated regardless of case in all collected text.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/mappings/hostname_map.py | 6 +++---
|
|
|
003633 |
sos/cleaner/parsers/hostname_parser.py | 8 +++++---
|
|
|
003633 |
tests/cleaner_tests/full_report_run.py | 21 ++++++++++++++++++++-
|
|
|
003633 |
3 files changed, 28 insertions(+), 7 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
index e70a5530..0fe78fb1 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
@@ -169,13 +169,13 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
|
|
|
003633 |
def sanitize_item(self, item):
|
|
|
003633 |
host = item.split('.')
|
|
|
003633 |
- if all([h.isupper() for h in host]):
|
|
|
003633 |
+ if len(host) > 1 and all([h.isupper() for h in host]):
|
|
|
003633 |
# by convention we have just a domain
|
|
|
003633 |
_host = [h.lower() for h in host]
|
|
|
003633 |
return self.sanitize_domain(_host).upper()
|
|
|
003633 |
if len(host) == 1:
|
|
|
003633 |
# we have a shortname for a host
|
|
|
003633 |
- return self.sanitize_short_name(host[0])
|
|
|
003633 |
+ return self.sanitize_short_name(host[0].lower())
|
|
|
003633 |
if len(host) == 2:
|
|
|
003633 |
# we have just a domain name, e.g. example.com
|
|
|
003633 |
return self.sanitize_domain(host)
|
|
|
003633 |
@@ -185,7 +185,7 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
domain = host[1:]
|
|
|
003633 |
# obfuscate the short name
|
|
|
003633 |
if len(hostname) > 2:
|
|
|
003633 |
- ob_hostname = self.sanitize_short_name(hostname)
|
|
|
003633 |
+ ob_hostname = self.sanitize_short_name(hostname.lower())
|
|
|
003633 |
else:
|
|
|
003633 |
# by best practice it appears the host part of the fqdn was cut
|
|
|
003633 |
# off due to some form of truncating, as such don't obfuscate
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
index 0a733bee..7fd0e698 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/hostname_parser.py
|
|
|
003633 |
@@ -8,6 +8,8 @@
|
|
|
003633 |
#
|
|
|
003633 |
# See the LICENSE file in the source distribution for further information.
|
|
|
003633 |
|
|
|
003633 |
+import re
|
|
|
003633 |
+
|
|
|
003633 |
from sos.cleaner.parsers import SoSCleanerParser
|
|
|
003633 |
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
|
|
|
003633 |
|
|
|
003633 |
@@ -91,9 +93,9 @@ class SoSHostnameParser(SoSCleanerParser):
|
|
|
003633 |
"""
|
|
|
003633 |
if search in self.mapping.skip_keys:
|
|
|
003633 |
return ln, count
|
|
|
003633 |
- if search in ln:
|
|
|
003633 |
- count += ln.count(search)
|
|
|
003633 |
- ln = ln.replace(search, self.mapping.get(repl or search))
|
|
|
003633 |
+ _reg = re.compile(search, re.I)
|
|
|
003633 |
+ if _reg.search(ln):
|
|
|
003633 |
+ return _reg.subn(self.mapping.get(repl or search), ln)
|
|
|
003633 |
return ln, count
|
|
|
003633 |
|
|
|
003633 |
count = 0
|
|
|
003633 |
diff --git a/tests/cleaner_tests/full_report_run.py b/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
index 2de54946..0b23acaf 100644
|
|
|
003633 |
--- a/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
+++ b/tests/cleaner_tests/full_report_run.py
|
|
|
003633 |
@@ -26,6 +26,24 @@ class FullCleanTest(StageTwoReportTest):
|
|
|
003633 |
# replace with an empty placeholder, make sure that this test case is not
|
|
|
003633 |
# influenced by previous clean runs
|
|
|
003633 |
files = ['/etc/sos/cleaner/default_mapping']
|
|
|
003633 |
+ packages = {
|
|
|
003633 |
+ 'rhel': ['python3-systemd'],
|
|
|
003633 |
+ 'ubuntu': ['python3-systemd']
|
|
|
003633 |
+ }
|
|
|
003633 |
+
|
|
|
003633 |
+ def pre_sos_setup(self):
|
|
|
003633 |
+ # ensure that case-insensitive matching of FQDNs and shortnames work
|
|
|
003633 |
+ from systemd import journal
|
|
|
003633 |
+ from socket import gethostname
|
|
|
003633 |
+ host = gethostname()
|
|
|
003633 |
+ short = host.split('.')[0]
|
|
|
003633 |
+ sosfd = journal.stream('sos-testing')
|
|
|
003633 |
+ sosfd.write(
|
|
|
003633 |
+ "This is a test line from sos clean testing. The hostname %s "
|
|
|
003633 |
+ "should not appear, nor should %s in an obfuscated archive. The "
|
|
|
003633 |
+ "shortnames of %s and %s should also not appear."
|
|
|
003633 |
+ % (host.lower(), host.upper(), short.lower(), short.upper())
|
|
|
003633 |
+ )
|
|
|
003633 |
|
|
|
003633 |
def test_private_map_was_generated(self):
|
|
|
003633 |
self.assertOutputContains('A mapping of obfuscated elements is available at')
|
|
|
003633 |
@@ -40,8 +58,9 @@ class FullCleanTest(StageTwoReportTest):
|
|
|
003633 |
|
|
|
003633 |
def test_hostname_not_in_any_file(self):
|
|
|
003633 |
host = self.sysinfo['pre']['networking']['hostname']
|
|
|
003633 |
+ short = host.split('.')[0]
|
|
|
003633 |
# much faster to just use grep here
|
|
|
003633 |
- content = self.grep_for_content(host)
|
|
|
003633 |
+ content = self.grep_for_content(host) + self.grep_for_content(short)
|
|
|
003633 |
if not content:
|
|
|
003633 |
assert True
|
|
|
003633 |
else:
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From aaeb8cb57ed55598ab744b96d4f127aedebcb292 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Tue, 21 Sep 2021 15:23:20 -0400
|
|
|
003633 |
Subject: [PATCH] [build] Add archives to setup.py packages
|
|
|
003633 |
|
|
|
003633 |
Adds the newly abstracted `sos.cleaner.archives` package to `setup.py`
|
|
|
003633 |
so that manual builds will properly include it.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
setup.py | 2 +-
|
|
|
003633 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/setup.py b/setup.py
|
|
|
003633 |
index 1e8d8e2dc5..7653b59de3 100644
|
|
|
003633 |
--- a/setup.py
|
|
|
003633 |
+++ b/setup.py
|
|
|
003633 |
@@ -102,7 +102,7 @@ def copy_file (self, filename, dirname):
|
|
|
003633 |
'sos.policies.package_managers', 'sos.policies.init_systems',
|
|
|
003633 |
'sos.report', 'sos.report.plugins', 'sos.collector',
|
|
|
003633 |
'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings',
|
|
|
003633 |
- 'sos.cleaner.parsers'
|
|
|
003633 |
+ 'sos.cleaner.parsers', 'sos.cleaner.archives'
|
|
|
003633 |
],
|
|
|
003633 |
cmdclass=cmdclass,
|
|
|
003633 |
command_options=command_options,
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From ba3528230256429a4394f155a9ca1fdb91cf3560 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Tue, 30 Nov 2021 12:46:34 -0500
|
|
|
003633 |
Subject: [PATCH 1/2] [hostname] Simplify case matching for domains
|
|
|
003633 |
|
|
|
003633 |
Instead of special handling all uppercase domain conventions, use our
|
|
|
003633 |
normal flow for obfuscation and just match the casing at the end of the
|
|
|
003633 |
sanitization routine.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/mappings/hostname_map.py | 14 ++++++++------
|
|
|
003633 |
1 file changed, 8 insertions(+), 6 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
index 0fe78fb1..5cd8e985 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
@@ -169,16 +169,15 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
|
|
|
003633 |
def sanitize_item(self, item):
|
|
|
003633 |
host = item.split('.')
|
|
|
003633 |
- if len(host) > 1 and all([h.isupper() for h in host]):
|
|
|
003633 |
- # by convention we have just a domain
|
|
|
003633 |
- _host = [h.lower() for h in host]
|
|
|
003633 |
- return self.sanitize_domain(_host).upper()
|
|
|
003633 |
if len(host) == 1:
|
|
|
003633 |
# we have a shortname for a host
|
|
|
003633 |
return self.sanitize_short_name(host[0].lower())
|
|
|
003633 |
if len(host) == 2:
|
|
|
003633 |
# we have just a domain name, e.g. example.com
|
|
|
003633 |
- return self.sanitize_domain(host)
|
|
|
003633 |
+ dname = self.sanitize_domain(host)
|
|
|
003633 |
+ if all([h.isupper() for h in host]):
|
|
|
003633 |
+ dname = dname.upper()
|
|
|
003633 |
+ return dname
|
|
|
003633 |
if len(host) > 2:
|
|
|
003633 |
# we have an FQDN, e.g. foo.example.com
|
|
|
003633 |
hostname = host[0]
|
|
|
003633 |
@@ -194,7 +193,10 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
ob_hostname = 'unknown'
|
|
|
003633 |
ob_domain = self.sanitize_domain(domain)
|
|
|
003633 |
self.dataset[item] = ob_domain
|
|
|
003633 |
- return '.'.join([ob_hostname, ob_domain])
|
|
|
003633 |
+ _fqdn = '.'.join([ob_hostname, ob_domain])
|
|
|
003633 |
+ if all([h.isupper() for h in host]):
|
|
|
003633 |
+ _fqdn = _fqdn.upper()
|
|
|
003633 |
+ return _fqdn
|
|
|
003633 |
|
|
|
003633 |
def sanitize_short_name(self, hostname):
|
|
|
003633 |
"""Obfuscate the short name of the host with an incremented counter
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
|
|
|
003633 |
From 189586728de22dd55122c1f7e06b19590f9a788f Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Tue, 30 Nov 2021 12:47:58 -0500
|
|
|
003633 |
Subject: [PATCH 2/2] [username] Improve username sourcing and remove case
|
|
|
003633 |
sensitivity
|
|
|
003633 |
|
|
|
003633 |
First, don't skip the first line of `last` output, and instead add the
|
|
|
003633 |
header from lastlog to the skip list. Additionally, add
|
|
|
003633 |
`/etc/cron.allow` and `/etc/cron.deny` as sources for usernames that
|
|
|
003633 |
might not appear in other locations in certain environments.
|
|
|
003633 |
|
|
|
003633 |
Also, make matching and replacement case insensitive.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/archives/sos.py | 4 +++-
|
|
|
003633 |
sos/cleaner/mappings/username_map.py | 2 +-
|
|
|
003633 |
sos/cleaner/parsers/username_parser.py | 14 +++++++++-----
|
|
|
003633 |
3 files changed, 13 insertions(+), 7 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py
|
|
|
003633 |
index f8720c88..12766496 100644
|
|
|
003633 |
--- a/sos/cleaner/archives/sos.py
|
|
|
003633 |
+++ b/sos/cleaner/archives/sos.py
|
|
|
003633 |
@@ -35,7 +35,9 @@ class SoSReportArchive(SoSObfuscationArchive):
|
|
|
003633 |
'sos_commands/login/lastlog_-u_65537-4294967295',
|
|
|
003633 |
# AD users will be reported here, but favor the lastlog files since
|
|
|
003633 |
# those will include local users who have not logged in
|
|
|
003633 |
- 'sos_commands/login/last'
|
|
|
003633 |
+ 'sos_commands/login/last',
|
|
|
003633 |
+ 'etc/cron.allow',
|
|
|
003633 |
+ 'etc/cron.deny'
|
|
|
003633 |
]
|
|
|
003633 |
}
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/username_map.py b/sos/cleaner/mappings/username_map.py
|
|
|
003633 |
index cdbf36fe..7ecccd7b 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/username_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/username_map.py
|
|
|
003633 |
@@ -33,5 +33,5 @@ class SoSUsernameMap(SoSMap):
|
|
|
003633 |
ob_name = "obfuscateduser%s" % self.name_count
|
|
|
003633 |
self.name_count += 1
|
|
|
003633 |
if ob_name in self.dataset.values():
|
|
|
003633 |
- return self.sanitize_item(username)
|
|
|
003633 |
+ return self.sanitize_item(username.lower())
|
|
|
003633 |
return ob_name
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
index 35377a31..229c7de4 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
@@ -8,6 +8,7 @@
|
|
|
003633 |
#
|
|
|
003633 |
# See the LICENSE file in the source distribution for further information.
|
|
|
003633 |
|
|
|
003633 |
+import re
|
|
|
003633 |
|
|
|
003633 |
from sos.cleaner.parsers import SoSCleanerParser
|
|
|
003633 |
from sos.cleaner.mappings.username_map import SoSUsernameMap
|
|
|
003633 |
@@ -34,6 +35,7 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
003633 |
'reboot',
|
|
|
003633 |
'root',
|
|
|
003633 |
'ubuntu',
|
|
|
003633 |
+ 'username',
|
|
|
003633 |
'wtmp'
|
|
|
003633 |
]
|
|
|
003633 |
|
|
|
003633 |
@@ -47,12 +49,12 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
003633 |
this parser, we need to override the initial parser prepping here.
|
|
|
003633 |
"""
|
|
|
003633 |
users = set()
|
|
|
003633 |
- for line in content.splitlines()[1:]:
|
|
|
003633 |
+ for line in content.splitlines():
|
|
|
003633 |
try:
|
|
|
003633 |
user = line.split()[0]
|
|
|
003633 |
except Exception:
|
|
|
003633 |
continue
|
|
|
003633 |
- if user in self.skip_list:
|
|
|
003633 |
+ if user.lower() in self.skip_list:
|
|
|
003633 |
continue
|
|
|
003633 |
users.add(user)
|
|
|
003633 |
for each in users:
|
|
|
003633 |
@@ -61,7 +63,9 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
003633 |
def parse_line(self, line):
|
|
|
003633 |
count = 0
|
|
|
003633 |
for username in sorted(self.mapping.dataset.keys(), reverse=True):
|
|
|
003633 |
- if username in line:
|
|
|
003633 |
- count = line.count(username)
|
|
|
003633 |
- line = line.replace(username, self.mapping.get(username))
|
|
|
003633 |
+ _reg = re.compile(username, re.I)
|
|
|
003633 |
+ if _reg.search(line):
|
|
|
003633 |
+ line, count = _reg.subn(
|
|
|
003633 |
+ self.mapping.get(username.lower()), line
|
|
|
003633 |
+ )
|
|
|
003633 |
return line, count
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From cafd0f3a52436a3966576e7db21e5dd17c06f0cc Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Sun, 12 Dec 2021 11:10:46 -0500
|
|
|
003633 |
Subject: [PATCH] [hostname] Fix edge case for new hosts in a known subdomain
|
|
|
003633 |
|
|
|
003633 |
Fixes an edge case that would cause us to at first not recognize that a
|
|
|
003633 |
given hostname string is a new host in a known subdomain, but then on
|
|
|
003633 |
the obfuscation attempt properly recognize it as such and result in an
|
|
|
003633 |
incomplete obfuscation.
|
|
|
003633 |
|
|
|
003633 |
This was mostly triggered by specific patterns for build hosts within
|
|
|
003633 |
`sos_commands/rpm/package-data`. With this refined check, these types of
|
|
|
003633 |
matches are properly obfuscated.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/mappings/hostname_map.py | 9 +++++----
|
|
|
003633 |
1 file changed, 5 insertions(+), 4 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
index 5cd8e9857..33b0e6c80 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
@@ -129,7 +129,7 @@ def get(self, item):
|
|
|
003633 |
item = item[0:-1]
|
|
|
003633 |
if not self.domain_name_in_loaded_domains(item.lower()):
|
|
|
003633 |
return item
|
|
|
003633 |
- if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
|
|
|
003633 |
+ if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem', '.log')):
|
|
|
003633 |
ext = '.' + item.split('.')[-1]
|
|
|
003633 |
item = item.replace(ext, '')
|
|
|
003633 |
suffix += ext
|
|
|
003633 |
@@ -148,7 +148,8 @@ def get(self, item):
|
|
|
003633 |
if len(_test) == 1 or not _test[0]:
|
|
|
003633 |
# does not match existing obfuscation
|
|
|
003633 |
continue
|
|
|
003633 |
- elif _test[0].endswith('.') and not _host_substr:
|
|
|
003633 |
+ elif not _host_substr and (_test[0].endswith('.') or
|
|
|
003633 |
+ item.endswith(_existing)):
|
|
|
003633 |
# new hostname in known domain
|
|
|
003633 |
final = super(SoSHostnameMap, self).get(item)
|
|
|
003633 |
break
|
|
|
003633 |
@@ -219,8 +220,8 @@ def sanitize_domain(self, domain):
|
|
|
003633 |
# don't obfuscate vendor domains
|
|
|
003633 |
if re.match(_skip, '.'.join(domain)):
|
|
|
003633 |
return '.'.join(domain)
|
|
|
003633 |
- top_domain = domain[-1]
|
|
|
003633 |
- dname = '.'.join(domain[0:-1])
|
|
|
003633 |
+ top_domain = domain[-1].lower()
|
|
|
003633 |
+ dname = '.'.join(domain[0:-1]).lower()
|
|
|
003633 |
ob_domain = self._new_obfuscated_domain(dname)
|
|
|
003633 |
ob_domain = '.'.join([ob_domain, top_domain])
|
|
|
003633 |
self.dataset['.'.join(domain)] = ob_domain
|
|
|
003633 |
From f5e1298162a9393ea2d9f5c4df40dfece50f5f88 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Thu, 6 Jan 2022 13:15:15 -0500
|
|
|
003633 |
Subject: [PATCH 1/3] [hostname] Fix loading and detection of long base domains
|
|
|
003633 |
|
|
|
003633 |
Our domain matching has up to now assumed that users would be providing
|
|
|
003633 |
'base' domains such as 'example.com' whereby something like
|
|
|
003633 |
'foo.bar.example.com' is a subdomain (or host) within that base domain.
|
|
|
003633 |
|
|
|
003633 |
However, the use case exists to provide 'foo.bar.example.com' as the
|
|
|
003633 |
base domain, without wanting to obfuscate 'example.com' directly.
|
|
|
003633 |
|
|
|
003633 |
This commit fixes our handling of both loading these longer domains and
|
|
|
003633 |
doing the 'domain is part of a domain we want to obfuscate' check.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/mappings/hostname_map.py | 9 ++++++++-
|
|
|
003633 |
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
index 33b0e6c8..7a7cf6b8 100644
|
|
|
003633 |
--- a/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
+++ b/sos/cleaner/mappings/hostname_map.py
|
|
|
003633 |
@@ -50,10 +50,14 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
in this parser, we need to re-inject entries from the map_file into
|
|
|
003633 |
these dicts and not just the underlying 'dataset' dict
|
|
|
003633 |
"""
|
|
|
003633 |
- for domain in self.dataset:
|
|
|
003633 |
+ for domain, ob_pair in self.dataset.items():
|
|
|
003633 |
if len(domain.split('.')) == 1:
|
|
|
003633 |
self.hosts[domain.split('.')[0]] = self.dataset[domain]
|
|
|
003633 |
else:
|
|
|
003633 |
+ if ob_pair.startswith('obfuscateddomain'):
|
|
|
003633 |
+ # directly exact domain matches
|
|
|
003633 |
+ self._domains[domain] = ob_pair.split('.')[0]
|
|
|
003633 |
+ continue
|
|
|
003633 |
# strip the host name and trailing top-level domain so that
|
|
|
003633 |
# we in inject the domain properly for later string matching
|
|
|
003633 |
|
|
|
003633 |
@@ -102,9 +106,12 @@ class SoSHostnameMap(SoSMap):
|
|
|
003633 |
and should be obfuscated
|
|
|
003633 |
"""
|
|
|
003633 |
host = domain.split('.')
|
|
|
003633 |
+ no_tld = '.'.join(domain.split('.')[0:-1])
|
|
|
003633 |
if len(host) == 1:
|
|
|
003633 |
# don't block on host's shortname
|
|
|
003633 |
return host[0] in self.hosts.keys()
|
|
|
003633 |
+ elif any([no_tld.endswith(_d) for _d in self._domains]):
|
|
|
003633 |
+ return True
|
|
|
003633 |
else:
|
|
|
003633 |
domain = host[0:-1]
|
|
|
003633 |
for known_domain in self._domains:
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
|
|
|
003633 |
From e241cf33a14ecd4e848a5fd857c5d3d7d07fbd71 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Thu, 6 Jan 2022 13:18:44 -0500
|
|
|
003633 |
Subject: [PATCH 2/3] [cleaner] Improve parser-specific file skipping
|
|
|
003633 |
|
|
|
003633 |
This commit improves our handling of skipping files on a per-parser
|
|
|
003633 |
basis, by first filtering the list of parsers that `obfuscate_line()`
|
|
|
003633 |
will iterate over by the parser's `skip_file` class attr, rather than
|
|
|
003633 |
relying on higher-level checks.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/__init__.py | 17 ++++++++++++++---
|
|
|
003633 |
1 file changed, 14 insertions(+), 3 deletions(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
|
|
|
003633 |
index 3f530d44..5686e213 100644
|
|
|
003633 |
--- a/sos/cleaner/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/__init__.py
|
|
|
003633 |
@@ -12,6 +12,7 @@ import hashlib
|
|
|
003633 |
import json
|
|
|
003633 |
import logging
|
|
|
003633 |
import os
|
|
|
003633 |
+import re
|
|
|
003633 |
import shutil
|
|
|
003633 |
import tempfile
|
|
|
003633 |
|
|
|
003633 |
@@ -640,10 +641,16 @@ third party.
|
|
|
003633 |
self.log_debug("Obfuscating %s" % short_name or filename,
|
|
|
003633 |
caller=arc_name)
|
|
|
003633 |
tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
|
|
|
003633 |
+ _parsers = [
|
|
|
003633 |
+ _p for _p in self.parsers if not
|
|
|
003633 |
+ any([
|
|
|
003633 |
+ re.match(p, short_name) for p in _p.skip_files
|
|
|
003633 |
+ ])
|
|
|
003633 |
+ ]
|
|
|
003633 |
with open(filename, 'r') as fname:
|
|
|
003633 |
for line in fname:
|
|
|
003633 |
try:
|
|
|
003633 |
- line, count = self.obfuscate_line(line)
|
|
|
003633 |
+ line, count = self.obfuscate_line(line, _parsers)
|
|
|
003633 |
subs += count
|
|
|
003633 |
tfile.write(line)
|
|
|
003633 |
except Exception as err:
|
|
|
003633 |
@@ -713,7 +720,7 @@ third party.
|
|
|
003633 |
pass
|
|
|
003633 |
return string_data
|
|
|
003633 |
|
|
|
003633 |
- def obfuscate_line(self, line):
|
|
|
003633 |
+ def obfuscate_line(self, line, parsers=None):
|
|
|
003633 |
"""Run a line through each of the obfuscation parsers, keeping a
|
|
|
003633 |
cumulative total of substitutions done on that particular line.
|
|
|
003633 |
|
|
|
003633 |
@@ -721,6 +728,8 @@ third party.
|
|
|
003633 |
|
|
|
003633 |
:param line str: The raw line as read from the file being
|
|
|
003633 |
processed
|
|
|
003633 |
+ :param parsers: A list of parser objects to obfuscate
|
|
|
003633 |
+ with. If None, use all.
|
|
|
003633 |
|
|
|
003633 |
Returns the fully obfuscated line and the number of substitutions made
|
|
|
003633 |
"""
|
|
|
003633 |
@@ -729,7 +738,9 @@ third party.
|
|
|
003633 |
count = 0
|
|
|
003633 |
if not line.strip():
|
|
|
003633 |
return line, count
|
|
|
003633 |
- for parser in self.parsers:
|
|
|
003633 |
+ if parsers is None:
|
|
|
003633 |
+ parsers = self.parsers
|
|
|
003633 |
+ for parser in parsers:
|
|
|
003633 |
try:
|
|
|
003633 |
line, _count = parser.parse_line(line)
|
|
|
003633 |
count += _count
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
|
|
|
003633 |
From 96c9a833e77639a853b7d3d6f1df68bbbbe5e9cb Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Thu, 6 Jan 2022 13:20:32 -0500
|
|
|
003633 |
Subject: [PATCH 3/3] [cleaner] Add skips for known files and usernames
|
|
|
003633 |
|
|
|
003633 |
Adds skips for `/proc/kallsyms` which should never be obfuscated, as
|
|
|
003633 |
well as any packaging-related log file for the IP parser. Further, do
|
|
|
003633 |
not obfuscate the `stack` users, as that is a well-known user for many
|
|
|
003633 |
configurations that, if obfuscated, could result in undesired string
|
|
|
003633 |
substitutions in normal logging.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/cleaner/archives/__init__.py | 2 ++
|
|
|
003633 |
sos/cleaner/parsers/ip_parser.py | 3 ++-
|
|
|
003633 |
sos/cleaner/parsers/username_parser.py | 1 +
|
|
|
003633 |
3 files changed, 5 insertions(+), 1 deletion(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py
|
|
|
003633 |
index 795c5a78..cbf1f809 100644
|
|
|
003633 |
--- a/sos/cleaner/archives/__init__.py
|
|
|
003633 |
+++ b/sos/cleaner/archives/__init__.py
|
|
|
003633 |
@@ -43,6 +43,7 @@ class SoSObfuscationArchive():
|
|
|
003633 |
type_name = 'undetermined'
|
|
|
003633 |
description = 'undetermined'
|
|
|
003633 |
is_nested = False
|
|
|
003633 |
+ skip_files = []
|
|
|
003633 |
prep_files = {}
|
|
|
003633 |
|
|
|
003633 |
def __init__(self, archive_path, tmpdir):
|
|
|
003633 |
@@ -111,6 +112,7 @@ class SoSObfuscationArchive():
|
|
|
003633 |
Returns: list of files and file regexes
|
|
|
003633 |
"""
|
|
|
003633 |
return [
|
|
|
003633 |
+ 'proc/kallsyms',
|
|
|
003633 |
'sosreport-',
|
|
|
003633 |
'sys/firmware',
|
|
|
003633 |
'sys/fs',
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
index 71d38be8..b007368c 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/ip_parser.py
|
|
|
003633 |
@@ -37,7 +37,8 @@ class SoSIPParser(SoSCleanerParser):
|
|
|
003633 |
'sos_commands/snappy/snap_list_--all',
|
|
|
003633 |
'sos_commands/snappy/snap_--version',
|
|
|
003633 |
'sos_commands/vulkan/vulkaninfo',
|
|
|
003633 |
- 'var/log/.*dnf.*'
|
|
|
003633 |
+ 'var/log/.*dnf.*',
|
|
|
003633 |
+ 'var/log/.*packag.*' # get 'packages' and 'packaging' logs
|
|
|
003633 |
]
|
|
|
003633 |
|
|
|
003633 |
map_file_key = 'ip_map'
|
|
|
003633 |
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
index 229c7de4..3208a655 100644
|
|
|
003633 |
--- a/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
+++ b/sos/cleaner/parsers/username_parser.py
|
|
|
003633 |
@@ -32,6 +32,7 @@ class SoSUsernameParser(SoSCleanerParser):
|
|
|
003633 |
'nobody',
|
|
|
003633 |
'nfsnobody',
|
|
|
003633 |
'shutdown',
|
|
|
003633 |
+ 'stack',
|
|
|
003633 |
'reboot',
|
|
|
003633 |
'root',
|
|
|
003633 |
'ubuntu',
|
|
|
003633 |
--
|
|
|
003633 |
2.31.1
|
|
|
003633 |
|
|
|
003633 |
From 7ebb2ce0bcd13c1b3aada648aceb20b5aff636d9 Mon Sep 17 00:00:00 2001
|
|
|
003633 |
From: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
Date: Tue, 15 Feb 2022 14:18:02 -0500
|
|
|
003633 |
Subject: [PATCH] [host] Skip entire /etc/sos/cleaner directory
|
|
|
003633 |
|
|
|
003633 |
While `default_mapping` is typically the only file expected under
|
|
|
003633 |
`/etc/sos/cleaner/` it is possible for other mapping files (such as
|
|
|
003633 |
backups) to appear there.
|
|
|
003633 |
|
|
|
003633 |
Make the `add_forbidden_path()` spec here target the entire cleaner
|
|
|
003633 |
directory to avoid ever capturing these map files.
|
|
|
003633 |
|
|
|
003633 |
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
|
|
|
003633 |
---
|
|
|
003633 |
sos/report/plugins/host.py | 2 +-
|
|
|
003633 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
003633 |
|
|
|
003633 |
diff --git a/sos/report/plugins/host.py b/sos/report/plugins/host.py
|
|
|
003633 |
index 5e21da7b8e..95a3b9cd95 100644
|
|
|
003633 |
--- a/sos/report/plugins/host.py
|
|
|
003633 |
+++ b/sos/report/plugins/host.py
|
|
|
003633 |
@@ -20,7 +20,7 @@ class Host(Plugin, IndependentPlugin):
|
|
|
003633 |
|
|
|
003633 |
def setup(self):
|
|
|
003633 |
|
|
|
003633 |
- self.add_forbidden_path('/etc/sos/cleaner/default_mapping')
|
|
|
003633 |
+ self.add_forbidden_path('/etc/sos/cleaner')
|
|
|
003633 |
|
|
|
003633 |
self.add_cmd_output('hostname', root_symlink='hostname')
|
|
|
003633 |
self.add_cmd_output('uptime', root_symlink='uptime')
|