diff --git a/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch b/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch new file mode 100644 index 0000000..ed1088b --- /dev/null +++ b/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch @@ -0,0 +1,32 @@ +From bbb7f8bf522960a8ca7625f539e9e5d109abb704 Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Wed, 19 May 2021 08:31:45 +0200 +Subject: [PATCH] [networking] collect also tc filter show ingress + +Both "tc -s filter show dev %eth [|ingress]" commands required as +they provide different output. + +Resolves: #2550 + +Signed-off-by: Pavel Moravec +--- + sos/report/plugins/networking.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py +index acfa027f..35646268 100644 +--- a/sos/report/plugins/networking.py ++++ b/sos/report/plugins/networking.py +@@ -156,7 +156,8 @@ class Networking(Plugin): + "ethtool --phy-statistics " + eth, + "ethtool --show-priv-flags " + eth, + "ethtool --show-eee " + eth, +- "tc -s filter show dev " + eth ++ "tc -s filter show dev " + eth, ++ "tc -s filter show dev " + eth + " ingress", + ], tags=eth) + + # skip EEPROM collection by default, as it might hang or +-- +2.26.3 + diff --git a/SOURCES/sos-bz1925419-all-gluster-files.patch b/SOURCES/sos-bz1925419-all-gluster-files.patch new file mode 100644 index 0000000..ab24429 --- /dev/null +++ b/SOURCES/sos-bz1925419-all-gluster-files.patch @@ -0,0 +1,39 @@ +From 4fb834ec862228afb276ccbd45aa86c66044ea66 Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Mon, 15 Mar 2021 09:09:51 +0100 +Subject: [PATCH] [gluster] collect public keys from the right dir + +Collection of glusterfind dir is achieved by /var/lib/gluster +so it doesn't be collected explicitly. + +/var/lib/glusterd/glusterfind/.keys/ subdir is required to be +explicitly collected, as add_copy_spec uses glob.glob() that skips +hidden files. + +Resolves: #2451 + +Signed-off-by: Pavel Moravec +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/gluster.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/sos/report/plugins/gluster.py b/sos/report/plugins/gluster.py +index e1a89df2..952cab63 100644 +--- a/sos/report/plugins/gluster.py ++++ b/sos/report/plugins/gluster.py +@@ -76,9 +76,8 @@ class Gluster(Plugin, RedHatPlugin): + "/var/lib/glusterd/", + # collect nfs-ganesha related configuration + "/run/gluster/shared_storage/nfs-ganesha/", +- # collect status files and public ssh keys +- "/var/lib/glusterd/.keys/", +- "/var/lib/glusterd/glusterfind/" ++ # collect public ssh keys (a_s_c skips implicit hidden files) ++ "/var/lib/glusterd/glusterfind/.keys/", + ] + glob.glob('/run/gluster/*tier-dht/*')) + + if not self.get_option("all_logs"): +-- +2.26.3 + diff --git a/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch b/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch new file mode 100644 index 0000000..07e005b --- /dev/null +++ b/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch @@ -0,0 +1,78 @@ +From b27140a9126ea82efb517d60bf1b8455aaf4f5a6 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 26 Mar 2021 11:12:33 -0400 +Subject: [PATCH] [cleaner] Only skip packaging-based files for the IP parser + +Files primarily containing package information, e.g. `installed-rpms` or +`installed-debs`, were previously being skipped by all parsers. In +reality, we only need to skip these for the IP parser due to the fact +that version numbers often generate a match for IP address regexes. + +This will also fix a problem where if a system was the build host for +certain packages, the hostname would remain in these files as the +hostname parser was previously not checking these files. + +Closes: #2400 +Resolves: #2464 + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/obfuscation_archive.py | 10 ---------- + sos/cleaner/parsers/ip_parser.py | 16 ++++++++++++++++ + 2 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py +index 981cc05f..84ca30cd 100644 +--- a/sos/cleaner/obfuscation_archive.py ++++ b/sos/cleaner/obfuscation_archive.py +@@ -59,20 +59,10 @@ class SoSObfuscationArchive(): + Returns: list of files and file regexes + """ + return [ +- 'installed-debs', +- 'installed-rpms', +- 'sos_commands/dpkg', +- 'sos_commands/python/pip_list', +- 'sos_commands/rpm', +- 'sos_commands/yum/.*list.*', +- 'sos_commands/snappy/snap_list_--all', +- 'sos_commands/snappy/snap_--version', +- 'sos_commands/vulkan/vulkaninfo', + 'sys/firmware', + 'sys/fs', + 'sys/kernel/debug', + 'sys/module', +- 'var/log/.*dnf.*', + r'.*\.tar$', # TODO: support archive unpacking + # Be explicit with these tar matches to avoid matching commands + r'.*\.tar\.xz', +diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py +index 3ea7f865..08d1cd05 100644 +--- a/sos/cleaner/parsers/ip_parser.py ++++ b/sos/cleaner/parsers/ip_parser.py +@@ -24,6 +24,22 @@ class SoSIPParser(SoSCleanerParser): + # don't match package versions recorded in journals + r'.*dnf\[.*\]:' + ] ++ ++ skip_files = [ ++ # skip these as version numbers will frequently look like IP addresses ++ # when using regex matching ++ 'installed-debs', ++ 'installed-rpms', ++ 'sos_commands/dpkg', ++ 'sos_commands/python/pip_list', ++ 'sos_commands/rpm', ++ 'sos_commands/yum/.*list.*', ++ 'sos_commands/snappy/snap_list_--all', ++ 'sos_commands/snappy/snap_--version', ++ 'sos_commands/vulkan/vulkaninfo', ++ 'var/log/.*dnf.*' ++ ] ++ + map_file_key = 'ip_map' + prep_map_file = 'sos_commands/networking/ip_-o_addr' + +-- +2.26.3 + diff --git a/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch b/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch new file mode 100644 index 0000000..1b62a1a --- /dev/null +++ b/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch @@ -0,0 +1,135 @@ +From 206d65618f20995b168dcc63090d1e6871450e90 Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Wed, 26 May 2021 15:45:26 +0200 +Subject: [PATCH] [archive] skip copying SELinux context for /proc and /sys + everytime + +A supplement of #1399 fix, now also for adding strings or special +device files. + +Also adding a (vendor) test case for it. + +Resolves: #2560 + +Signed-off-by: Pavel Moravec +--- + sos/archive.py | 35 +++++++++++---------- + tests/vendor_tests/redhat/rhbz1965001.py | 39 ++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 18 deletions(-) + create mode 100644 tests/vendor_tests/redhat/rhbz1965001.py + +diff --git a/sos/archive.py b/sos/archive.py +index 4dd31d75..b02b2475 100644 +--- a/sos/archive.py ++++ b/sos/archive.py +@@ -326,6 +326,20 @@ class FileCacheArchive(Archive): + return None + return dest + ++ def _copy_attributes(self, src, dest): ++ # copy file attributes, skip SELinux xattrs for /sys and /proc ++ try: ++ stat = os.stat(src) ++ if src.startswith("/sys/") or src.startswith("/proc/"): ++ shutil.copymode(src, dest) ++ os.utime(dest, ns=(stat.st_atime_ns, stat.st_mtime_ns)) ++ else: ++ shutil.copystat(src, dest) ++ os.chown(dest, stat.st_uid, stat.st_gid) ++ except Exception as e: ++ self.log_debug("caught '%s' setting attributes of '%s'" ++ % (e, dest)) ++ + def add_file(self, src, dest=None): + with self._path_lock: + if not dest: +@@ -348,18 +362,7 @@ class FileCacheArchive(Archive): + else: + self.log_info("File %s not collected: '%s'" % (src, e)) + +- # copy file attributes, skip SELinux xattrs for /sys and /proc +- try: +- stat = os.stat(src) +- if src.startswith("/sys/") or src.startswith("/proc/"): +- shutil.copymode(src, dest) +- os.utime(dest, ns=(stat.st_atime_ns, stat.st_mtime_ns)) +- else: +- shutil.copystat(src, dest) +- os.chown(dest, stat.st_uid, stat.st_gid) +- except Exception as e: +- self.log_debug("caught '%s' setting attributes of '%s'" +- % (e, dest)) ++ self._copy_attributes(src, dest) + file_name = "'%s'" % src + else: + # Open file case: first rewind the file to obtain +@@ -388,11 +391,7 @@ class FileCacheArchive(Archive): + content = content.decode('utf8', 'ignore') + f.write(content) + if os.path.exists(src): +- try: +- shutil.copystat(src, dest) +- except OSError as e: +- self.log_error("Unable to add '%s' to archive: %s" % +- (dest, e)) ++ self._copy_attributes(src, dest) + self.log_debug("added string at '%s' to FileCacheArchive '%s'" + % (src, self._archive_root)) + +@@ -501,7 +500,7 @@ class FileCacheArchive(Archive): + self.log_info("add_node: %s - mknod '%s'" % (msg, dest)) + return + raise e +- shutil.copystat(path, dest) ++ self._copy_attributes(path, dest) + + def name_max(self): + if 'PC_NAME_MAX' in os.pathconf_names: +diff --git a/tests/vendor_tests/redhat/rhbz1965001.py b/tests/vendor_tests/redhat/rhbz1965001.py +new file mode 100644 +index 00000000..aa16ba81 +--- /dev/null ++++ b/tests/vendor_tests/redhat/rhbz1965001.py +@@ -0,0 +1,39 @@ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++ ++import tempfile ++import shutil ++from sos_tests import StageOneReportTest ++ ++ ++class rhbz1965001(StageOneReportTest): ++ """ ++ Copying /proc/sys/vm/{compact_memory,drop_caches} must ignore SELinux ++ context, otherwise an attempt to set the context to files under some ++ directories like /tmp raises an AVC denial, and an ERROR ++ "Unable to add '...' to archive: [Errno 13] Permission denied: '...' ++ is raise. ++ ++ https://bugzilla.redhat.com/show_bug.cgi?id=1965001 ++ ++ :avocado: enable ++ :avocado: tags=stageone ++ """ ++ ++ sos_cmd = '-o system' ++ # it is crucial to run the test case with --tmp-dir=/tmp/... as that is ++ # (an example of) directory exhibiting the relabel permission deny. ++ # /var/tmp directory allows those relabels. ++ # ++ # the directory shouldn't exist at this moment, otherwise ++ # "check to prevent multiple setUp() runs" in sos_tests.py would fail ++ _tmpdir = '/tmp/rhbz1965001_avocado_test' ++ ++ def test_no_permission_denied(self): ++ self.assertSosLogNotContains("Permission denied") +-- +2.26.3 + diff --git a/SOURCES/sos-bz1967613-sssd-common.patch b/SOURCES/sos-bz1967613-sssd-common.patch new file mode 100644 index 0000000..9937972 --- /dev/null +++ b/SOURCES/sos-bz1967613-sssd-common.patch @@ -0,0 +1,36 @@ +From 630dfbee936050698d33b59abd1e243c44e50af8 Mon Sep 17 00:00:00 2001 +From: Jan Jansky +Date: Thu, 3 Jun 2021 15:04:57 +0200 +Subject: [PATCH] [sssd] sssd plugin when sssd-common + +We have reports that sssd logs are not +collected, when we investigated +we found associate wants to collect +sssd related logs also when only +sssd-common package is installed. + +We got this confirmed by sbr-idm. + +Resolves: #2571 + +Signed-off-by: Jan Jansky +--- + sos/report/plugins/sssd.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sos/report/plugins/sssd.py b/sos/report/plugins/sssd.py +index 17933935..6f98e90c 100644 +--- a/sos/report/plugins/sssd.py ++++ b/sos/report/plugins/sssd.py +@@ -19,7 +19,7 @@ class Sssd(Plugin): + + plugin_name = "sssd" + profiles = ('services', 'security', 'identity') +- packages = ('sssd',) ++ packages = ('sssd', 'sssd-common') + + def setup(self): + self.add_copy_spec([ +-- +2.26.3 + diff --git a/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch b/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch new file mode 100644 index 0000000..205152f --- /dev/null +++ b/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch @@ -0,0 +1,2156 @@ +From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 9 Apr 2021 11:32:14 -0400 +Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on + first match + +Previously, `parse_string_for_keys()`, called by `obfuscate_string()` +for non-regex based obfuscations, would return on the first match in the +string found for each parser. + +Instead, continue iterating over all items in each parser's dataset +before returning the (now fully) obfuscated string. + +Resolves: #2480 + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/parsers/__init__.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py +index dd0451df..c77300aa 100644 +--- a/sos/cleaner/parsers/__init__.py ++++ b/sos/cleaner/parsers/__init__.py +@@ -104,7 +104,7 @@ class SoSCleanerParser(): + """ + for key, val in self.mapping.dataset.items(): + if key in string_data: +- return string_data.replace(key, val) ++ string_data = string_data.replace(key, val) + return string_data + + def get_map_contents(self): +-- +2.26.3 + +From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 12 May 2021 12:39:48 -0400 +Subject: [PATCH] [collect] Add options to provide registry auth for pulling + images + +Adds options that allow a user to specify registry authentication, +either via username/password or an authfile, to allow pulling an image +that exists on a non-public registry. + +If a username/password is provided, that will be used. If not, we will +attempt to use an authfile - either provided by the user or by a cluster +profile. + +Also adds an option to forcibly pull a new(er) version of the specified +image, to alleviate conditions where a too-old version of the image +already exists on the host. + +Closes: #2534 + +Signed-off-by: Jake Hunsaker +--- + man/en/sos-collect.1 | 30 +++++++++++++++++++++++ + sos/collector/__init__.py | 17 +++++++++++++ + sos/collector/sosnode.py | 40 +++++++++++++++++++++++++++---- + sos/policies/distros/__init__.py | 16 ++++++++++++- + sos/policies/distros/redhat.py | 25 ++++++++++++------- + sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++ + 6 files changed, 140 insertions(+), 13 deletions(-) + +diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 +index 286bfe71..cdbc3257 100644 +--- a/man/en/sos-collect.1 ++++ b/man/en/sos-collect.1 +@@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes + [\-\-no\-pkg\-check] + [\-\-no\-local] + [\-\-master MASTER] ++ [\-\-image IMAGE] ++ [\-\-force-pull-image] ++ [\-\-registry-user USER] ++ [\-\-registry-password PASSWORD] ++ [\-\-registry-authfile FILE] + [\-o ONLY_PLUGINS] + [\-p SSH_PORT] + [\-\-password] +@@ -245,6 +250,31 @@ Specify a master node for the cluster. + If provided, then sos collect will check the master node, not localhost, for determining + the type of cluster in use. + .TP ++\fB\-\-image IMAGE\fR ++Specify an image to use for the temporary container created for collections on ++containerized host, if you do not want to use the default image specifed by the ++host's policy. Note that this should include the registry. ++.TP ++\fB\-\-force-pull-image\fR ++Use this option to force the container runtime to pull the specified image (even ++if it is the policy default image) even if the image already exists on the host. ++This may be useful to update an older container image on containerized hosts. ++.TP ++\fB\-\-registry-user USER\fR ++Specify the username to authenticate to the registry with in order to pull the container ++image ++.TP ++\fB\-\-registry-password PASSWORD\fR ++Specify the password to authenticate to the registry with in order to pull the container ++image. If no password is required, leave this blank. ++.TP ++\fB\-\-registry-authfile FILE\fR ++Specify the filename to use for providing authentication credentials to the registry ++to pull the container image. ++ ++Note that this file must exist on the node(s) performing the pull operations, not the ++node from which \fBsos collect\fR was run. ++.TP + \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS + Sosreport option. Run ONLY the plugins listed. + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index 1c742cf5..0624caad 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent): + 'encrypt_pass': '', + 'group': None, + 'image': '', ++ 'force_pull_image': False, + 'jobs': 4, + 'keywords': [], + 'keyword_file': None, +@@ -84,6 +85,9 @@ class SoSCollector(SoSComponent): + 'plugin_timeout': None, + 'cmd_timeout': None, + 'preset': '', ++ 'registry_user': None, ++ 'registry_password': None, ++ 'registry_authfile': None, + 'save_group': '', + 'since': '', + 'skip_commands': [], +@@ -319,6 +323,19 @@ class SoSCollector(SoSComponent): + collect_grp.add_argument('--image', + help=('Specify the container image to use for' + ' containerized hosts.')) ++ collect_grp.add_argument('--force-pull-image', '--pull', default=False, ++ action='store_true', ++ help='Force pull the container image even if ' ++ 'it already exists on the host') ++ collect_grp.add_argument('--registry-user', default=None, ++ help='Username to authenticate to the ' ++ 'registry with for pulling an image') ++ collect_grp.add_argument('--registry-password', default=None, ++ help='Password to authenticate to the ' ++ 'registry with for pulling an image') ++ collect_grp.add_argument('--registry-authfile', default=None, ++ help='Use this authfile to provide registry ' ++ 'authentication when pulling an image') + collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key') + collect_grp.add_argument('-j', '--jobs', default=4, type=int, + help='Number of concurrent nodes to collect') +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 48693342..d1c11824 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -134,9 +134,27 @@ class SosNode(): + """If the host is containerized, create the container we'll be using + """ + if self.host.containerized: +- res = self.run_command(self.host.create_sos_container(), +- need_root=True) +- if res['status'] in [0, 125]: # 125 means container exists ++ cmd = self.host.create_sos_container( ++ image=self.opts.image, ++ auth=self.get_container_auth(), ++ force_pull=self.opts.force_pull_image ++ ) ++ res = self.run_command(cmd, need_root=True) ++ if res['status'] in [0, 125]: ++ if res['status'] == 125: ++ if 'unable to retrieve auth token' in res['stdout']: ++ self.log_error( ++ "Could not pull image. Provide either a username " ++ "and password or authfile" ++ ) ++ raise Exception ++ elif 'unknown: Not found' in res['stdout']: ++ self.log_error('Specified image not found on registry') ++ raise Exception ++ # 'name exists' with code 125 means the container was ++ # created successfully, so ignore it. ++ # initial creations leads to an exited container, restarting it ++ # here will keep it alive for us to exec through + ret = self.run_command(self.host.restart_sos_container(), + need_root=True) + if ret['status'] == 0: +@@ -152,6 +170,20 @@ class SosNode(): + % res['stdout']) + raise Exception + ++ def get_container_auth(self): ++ """Determine what the auth string should be to pull the image used to ++ deploy our temporary container ++ """ ++ if self.opts.registry_user: ++ return self.host.runtimes['default'].fmt_registry_credentials( ++ self.opts.registry_user, ++ self.opts.registry_password ++ ) ++ else: ++ return self.host.runtimes['default'].fmt_registry_authfile( ++ self.opts.registry_authfile or self.host.container_authfile ++ ) ++ + def file_exists(self, fname): + """Checks for the presence of fname on the remote node""" + if not self.local: +@@ -343,7 +375,7 @@ class SosNode(): + % self.commons['policy'].distro) + return self.commons['policy'] + host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(), +- probe_runtime=False, remote_exec=self.ssh_cmd, ++ probe_runtime=True, remote_exec=self.ssh_cmd, + remote_check=self.read_file('/etc/os-release')) + if host: + self.log_info("loaded policy %s for host" % host.distro) +diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py +index 9fe31513..f5b9fd5b 100644 +--- a/sos/policies/distros/__init__.py ++++ b/sos/policies/distros/__init__.py +@@ -62,6 +62,7 @@ class LinuxPolicy(Policy): + sos_bin_path = '/usr/bin' + sos_container_name = 'sos-collector-tmp' + container_version_command = None ++ container_authfile = None + + def __init__(self, sysroot=None, init=None, probe_runtime=True): + super(LinuxPolicy, self).__init__(sysroot=sysroot, +@@ -626,13 +627,26 @@ class LinuxPolicy(Policy): + """ + return '' + +- def create_sos_container(self): ++ def create_sos_container(self, image=None, auth=None, force_pull=False): + """Returns the command that will create the container that will be + used for running commands inside a container on hosts that require it. + + This will use the container runtime defined for the host type to + launch a container. From there, we use the defined runtime to exec into + the container's namespace. ++ ++ :param image: The name of the image if not using the policy default ++ :type image: ``str`` or ``None`` ++ ++ :param auth: The auth string required by the runtime to pull an ++ image from the registry ++ :type auth: ``str`` or ``None`` ++ ++ :param force_pull: Should the runtime forcibly pull the image ++ :type force_pull: ``bool`` ++ ++ :returns: The command to execute to launch the temp container ++ :rtype: ``str`` + """ + return '' + +diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py +index 241d3f13..20afbcc4 100644 +--- a/sos/policies/distros/redhat.py ++++ b/sos/policies/distros/redhat.py +@@ -452,15 +452,19 @@ support representative. + + return self.find_preset(ATOMIC) + +- def create_sos_container(self): ++ def create_sos_container(self, image=None, auth=None, force_pull=False): + _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host" + " --net=host --pid=host -e HOST=/host -e NAME={name} -e " +- "IMAGE={image} -v /run:/run -v /var/log:/var/log -v " ++ "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v " + "/etc/machine-id:/etc/machine-id -v " +- "/etc/localtime:/etc/localtime -v /:/host {image}") ++ "/etc/localtime:/etc/localtime -v /:/host {auth} {image}") ++ _image = image or self.container_image ++ _pull = '--pull=always' if force_pull else '' + return _cmd.format(runtime=self.container_runtime, + name=self.sos_container_name, +- image=self.container_image) ++ image=_image, ++ pull=_pull, ++ auth=auth or '') + + def set_cleanup_cmd(self): + return 'docker rm --force sos-collector-tmp' +@@ -482,6 +486,7 @@ support representative. + container_image = 'registry.redhat.io/rhel8/support-tools' + sos_path_strip = '/host' + container_version_command = 'rpm -q sos' ++ container_authfile = '/var/lib/kubelet/config.json' + + def __init__(self, sysroot=None, init=None, probe_runtime=True, + remote_exec=None): +@@ -511,15 +516,19 @@ support representative. + # RH OCP environments. + return self.find_preset(RHOCP) + +- def create_sos_container(self): ++ def create_sos_container(self, image=None, auth=None, force_pull=False): + _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host" + " --net=host --pid=host -e HOST=/host -e NAME={name} -e " +- "IMAGE={image} -v /run:/run -v /var/log:/var/log -v " ++ "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v " + "/etc/machine-id:/etc/machine-id -v " +- "/etc/localtime:/etc/localtime -v /:/host {image}") ++ "/etc/localtime:/etc/localtime -v /:/host {auth} {image}") ++ _image = image or self.container_image ++ _pull = '--pull=always' if force_pull else '' + return _cmd.format(runtime=self.container_runtime, + name=self.sos_container_name, +- image=self.container_image) ++ image=_image, ++ pull=_pull, ++ auth=auth or '') + + def set_cleanup_cmd(self): + return 'podman rm --force %s' % self.sos_container_name +diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py +index 1a61b644..f28d6a1d 100644 +--- a/sos/policies/runtimes/__init__.py ++++ b/sos/policies/runtimes/__init__.py +@@ -157,6 +157,31 @@ class ContainerRuntime(): + quoted_cmd = cmd + return "%s %s %s" % (self.run_cmd, container, quoted_cmd) + ++ def fmt_registry_credentials(self, username, password): ++ """Format a string to pass to the 'run' command of the runtime to ++ enable authorization for pulling the image during `sos collect`, if ++ needed using username and optional password creds ++ ++ :param username: The name of the registry user ++ :type username: ``str`` ++ ++ :param password: The password of the registry user ++ :type password: ``str`` or ``None`` ++ ++ :returns: The string to use to enable a run command to pull the image ++ :rtype: ``str`` ++ """ ++ return "--creds=%s%s" % (username, ':' + password if password else '') ++ ++ def fmt_registry_authfile(self, authfile): ++ """Format a string to pass to the 'run' command of the runtime to ++ enable authorization for pulling the image during `sos collect`, if ++ needed using an authfile. ++ """ ++ if authfile: ++ return "--authfile %s" % authfile ++ return '' ++ + def get_logs_command(self, container): + """Get the command string used to dump container logs from the + runtime +-- +2.26.3 + +From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 12 May 2021 13:26:45 -0400 +Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes + +Adds a method the `Cluster` that allows a profile to set sos options +specifically for master nodes. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++ + sos/collector/sosnode.py | 6 ++++++ + 2 files changed, 27 insertions(+) + +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index 5c002bae..bfa3aad3 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -137,6 +137,27 @@ class Cluster(): + """ + self.cluster_ssh_key = key + ++ def set_master_options(self, node): ++ """If there is a need to set specific options in the sos command being ++ run on the cluster's master nodes, override this method in the cluster ++ profile and do that here. ++ ++ :param node: The master node ++ :type node: ``SoSNode`` ++ """ ++ pass ++ ++ def check_node_is_master(self, node): ++ """In the event there are multiple masters, or if the collect command ++ is being run from a system that is technically capable of enumerating ++ nodes but the cluster profiles needs to specify master-specific options ++ for other nodes, override this method in the cluster profile ++ ++ :param node: The node for the cluster to check ++ :type node: ``SoSNode`` ++ """ ++ return node.address == self.master.address ++ + def exec_master_cmd(self, cmd, need_root=False): + """Used to retrieve command output from a (master) node in a cluster + +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index d1c11824..62666635 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -647,6 +647,10 @@ class SosNode(): + self.cluster.sos_plugin_options[opt]) + self.opts.plugin_options.append(option) + ++ # set master-only options ++ if self.cluster.check_node_is_master(self): ++ self.cluster.set_master_options(self) ++ + def finalize_sos_cmd(self): + """Use host facts and compare to the cluster type to modify the sos + command if needed""" +@@ -707,6 +711,8 @@ class SosNode(): + os.path.join(self.host.sos_bin_path, self.sos_bin) + ) + ++ self.update_cmd_from_cluster() ++ + if self.opts.only_plugins: + plugs = [o for o in self.opts.only_plugins + if self._plugin_exists(o)] +-- +2.26.3 + +From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 12 May 2021 16:06:29 -0400 +Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node + lists + +If the master node is determined to not have sos installed, that is not +necessarily a fatal error for scenarios where the 'master' node is only +being used to enumerate node lists and is not actually part of the +cluster. This can happen when a user is using a bastion node to +enumerate and connect to the cluster environment, or if the local host +is being used to enumerate nodes via cluster client tooling. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/sosnode.py | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 62666635..7e56483d 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -287,13 +287,20 @@ class SosNode(): + # use the containerized policy's command + pkgs = self.run_command(self.host.container_version_command, + use_container=True, need_root=True) +- ver = pkgs['stdout'].strip().split('-')[1] +- if ver: +- self.sos_info['version'] = ver +- if 'version' in self.sos_info: ++ if pkgs['status'] == 0: ++ ver = pkgs['stdout'].strip().split('-')[1] ++ if ver: ++ self.sos_info['version'] = ver ++ else: ++ self.sos_info['version'] = None ++ if self.sos_info['version']: + self.log_info('sos version is %s' % self.sos_info['version']) + else: +- self.log_error('sos is not installed on this node') ++ if not self.address == self.opts.master: ++ # in the case where the 'master' enumerates nodes but is not ++ # intended for collection (bastions), don't worry about sos not ++ # being present ++ self.log_error('sos is not installed on this node') + self.connected = False + return False + cmd = 'sosreport -l' +-- +2.26.3 + +From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 12 May 2021 18:26:09 -0400 +Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for + containers + +Fixes an issue where the sos inspection commands were not properly +marked as needing to be run as root (either directly or via sudo) for +containerized hosts, which would lead to incorrect sos command +formatting. + +Mark those commands, and the final container removal command, as +explicitly needing root permissions. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/sosnode.py | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 7e56483d..1fc03076 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -304,7 +304,7 @@ class SosNode(): + self.connected = False + return False + cmd = 'sosreport -l' +- sosinfo = self.run_command(cmd, use_container=True) ++ sosinfo = self.run_command(cmd, use_container=True, need_root=True) + if sosinfo['status'] == 0: + self._load_sos_plugins(sosinfo['stdout']) + if self.check_sos_version('3.6'): +@@ -312,7 +312,7 @@ class SosNode(): + + def _load_sos_presets(self): + cmd = 'sosreport --list-presets' +- res = self.run_command(cmd, use_container=True) ++ res = self.run_command(cmd, use_container=True, need_root=True) + if res['status'] == 0: + for line in res['stdout'].splitlines(): + if line.strip().startswith('name:'): +@@ -996,7 +996,7 @@ class SosNode(): + self.remove_file(self.sos_path + '.md5') + cleanup = self.host.set_cleanup_cmd() + if cleanup: +- self.run_command(cleanup) ++ self.run_command(cleanup, need_root=True) + + def collect_extra_cmd(self, filenames): + """Collect the file created by a cluster outside of sos""" +-- +2.26.3 + +From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 12 May 2021 18:55:31 -0400 +Subject: [PATCH] [ocp] Add cluster profile for OCP4 + +Removes the previous OCP cluster profile and replaces it with an updated +one for OCP4 which is entirely separated from the kubernetes profile. + +Resolves: #2544 + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/kubernetes.py | 8 -- + sos/collector/clusters/ocp.py | 109 +++++++++++++++++++++++++++ + 2 files changed, 109 insertions(+), 8 deletions(-) + create mode 100644 sos/collector/clusters/ocp.py + +diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py +index 6a867e31..08fd9554 100644 +--- a/sos/collector/clusters/kubernetes.py ++++ b/sos/collector/clusters/kubernetes.py +@@ -44,11 +44,3 @@ class kubernetes(Cluster): + return nodes + else: + raise Exception('Node enumeration did not return usable output') +- +- +-class openshift(kubernetes): +- +- cluster_name = 'OpenShift Container Platform' +- packages = ('atomic-openshift',) +- sos_preset = 'ocp' +- cmd = 'oc' +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +new file mode 100644 +index 00000000..283fcfd1 +--- /dev/null ++++ b/sos/collector/clusters/ocp.py +@@ -0,0 +1,109 @@ ++# Copyright Red Hat 2021, Jake Hunsaker ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++from pipes import quote ++from sos.collector.clusters import Cluster ++ ++ ++class ocp(Cluster): ++ """OpenShift Container Platform v4""" ++ ++ cluster_name = 'OpenShift Container Platform v4' ++ packages = ('openshift-hyperkube', 'openshift-clients') ++ ++ option_list = [ ++ ('label', '', 'Colon delimited list of labels to select nodes with'), ++ ('role', '', 'Colon delimited list of roles to select nodes with'), ++ ('kubeconfig', '', 'Path to the kubeconfig file') ++ ] ++ ++ def fmt_oc_cmd(self, cmd): ++ """Format the oc command to optionall include the kubeconfig file if ++ one is specified ++ """ ++ if self.get_option('kubeconfig'): ++ return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd) ++ return "oc %s" % cmd ++ ++ def check_enabled(self): ++ if super(ocp, self).check_enabled(): ++ return True ++ _who = self.fmt_oc_cmd('whoami') ++ return self.exec_master_cmd(_who)['status'] == 0 ++ ++ def _build_dict(self, nodelist): ++ """From the output of get_nodes(), construct an easier-to-reference ++ dict of nodes that will be used in determining labels, master status, ++ etc... ++ ++ :param nodelist: The split output of `oc get nodes` ++ :type nodelist: ``list`` ++ ++ :returns: A dict of nodes with `get nodes` columns as keys ++ :rtype: ``dict`` ++ """ ++ nodes = {} ++ if 'NAME' in nodelist[0]: ++ # get the index of the fields ++ statline = nodelist.pop(0).split() ++ idx = {} ++ for state in ['status', 'roles', 'version', 'os-image']: ++ try: ++ idx[state] = statline.index(state.upper()) ++ except Exception: ++ pass ++ for node in nodelist: ++ _node = node.split() ++ nodes[_node[0]] = {} ++ for column in idx: ++ nodes[_node[0]][column] = _node[idx[column]] ++ return nodes ++ ++ def get_nodes(self): ++ nodes = [] ++ self.node_dict = {} ++ cmd = 'get nodes -o wide' ++ if self.get_option('label'): ++ labels = ','.join(self.get_option('label').split(':')) ++ cmd += " -l %s" % quote(labels) ++ res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) ++ if res['status'] == 0: ++ roles = [r for r in self.get_option('role').split(':')] ++ self.node_dict = self._build_dict(res['stdout'].splitlines()) ++ for node in self.node_dict: ++ if roles: ++ for role in roles: ++ if role in node: ++ nodes.append(node) ++ else: ++ nodes.append(node) ++ else: ++ msg = "'oc' command failed" ++ if 'Missing or incomplete' in res['stdout']: ++ msg = ("'oc' failed due to missing kubeconfig on master node." ++ " Specify one via '-c ocp.kubeconfig='") ++ raise Exception(msg) ++ return nodes ++ ++ def set_node_label(self, node): ++ if node.address not in self.node_dict: ++ return '' ++ for label in ['master', 'worker']: ++ if label in self.node_dict[node.address]['roles']: ++ return label ++ return '' ++ ++ def check_node_is_master(self, sosnode): ++ if sosnode.address not in self.node_dict: ++ return False ++ return 'master' in self.node_dict[sosnode.address]['roles'] ++ ++ def set_master_options(self, node): ++ node.opts.enable_plugins.append('openshift') +-- +2.26.3 + +From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 26 May 2021 11:55:24 -0400 +Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files + +Fixes an issue where empty lines would be stripped from files that have +other obfuscations in them. Those empty lines may be important for file +structure and/or readability, so we should instead simply not pass empty +lines to the parsers rather than skipping them wholesale in the flow of +writing obfuscations to a temp file before replacing the source file +with a potentially changed temp file. + +Resolves: #2562 + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/__init__.py | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index bdd24f95..55465b85 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -603,8 +603,6 @@ third party. + tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) + with open(filename, 'r') as fname: + for line in fname: +- if not line.strip(): +- continue + try: + line, count = self.obfuscate_line(line) + subs += count +@@ -642,7 +640,11 @@ third party. + + Returns the fully obfuscated line and the number of substitutions made + """ ++ # don't iterate over blank lines, but still write them to the tempfile ++ # to maintain the same structure when we write a scrubbed file back + count = 0 ++ if not line.strip(): ++ return line, count + for parser in self.parsers: + try: + line, _count = parser.parse_line(line) +-- +2.26.3 + +From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 5 May 2021 17:02:04 -0400 +Subject: [PATCH] [cleaner] Remove binary files by default + +Binary files generally speaking cannot be obfuscated, and as such we +should remove them from archives being obfuscated by default so that +sensitive data is not mistakenly included in an obfuscated archive. + +This commits adds a new `--keep-binary-files` option that if used will +keep any encountered binary files in the final archive. The default +option of `false` will ensure that encountered binary files are removed. + +The number of removed binary files per archive is reported when +obfuscation is completed for that archive. + +Closes: #2478 +Resolves: #2524 + +Signed-off-by: Jake Hunsaker +--- + man/en/sos-clean.1 | 12 ++++ + sos/cleaner/__init__.py | 21 +++++- + sos/cleaner/obfuscation_archive.py | 67 ++++++++++++++++++-- + sos/collector/__init__.py | 5 ++ + sos/report/__init__.py | 6 ++ + 8 files changed, 167 insertions(+), 7 deletions(-) + +diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 +index 4856b43b..b77bc63c 100644 +--- a/man/en/sos-clean.1 ++++ b/man/en/sos-clean.1 +@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports + [\-\-map-file] + [\-\-jobs] + [\-\-no-update] ++ [\-\-keep-binary-files] + + .SH DESCRIPTION + \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from +@@ -77,6 +78,17 @@ Default: 4 + .TP + .B \-\-no-update + Do not write the mapping file contents to /etc/sos/cleaner/default_mapping ++.TP ++.B \-\-keep-binary-files ++Keep unprocessable binary files in the archive, rather than removing them. ++ ++Note that binary files cannot be obfuscated, and thus keeping them in the archive ++may result in otherwise sensitive information being included in the final archive. ++Users should review any archive that keeps binary files in place before sending to ++a third party. ++ ++Default: False (remove encountered binary files) ++ + .SH SEE ALSO + .BR sos (1) + .BR sos-report (1) +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index 55465b85..f88ff8a0 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent): + 'keyword_file': None, + 'map_file': '/etc/sos/cleaner/default_mapping', + 'no_update': False, ++ 'keep_binary_files': False, + 'target': '', + 'usernames': [] + } +@@ -183,6 +184,11 @@ third party. + action='store_true', + help='Do not update the --map file with new ' + 'mappings from this run') ++ clean_grp.add_argument('--keep-binary-files', default=False, ++ action='store_true', ++ dest='keep_binary_files', ++ help='Keep unprocessable binary files in the ' ++ 'archive instead of removing them') + clean_grp.add_argument('--usernames', dest='usernames', default=[], + action='extend', + help='List of usernames to obfuscate') +@@ -467,6 +473,11 @@ third party. + "%s concurrently\n" + % (len(self.report_paths), self.opts.jobs)) + self.ui_log.info(msg) ++ if self.opts.keep_binary_files: ++ self.ui_log.warning( ++ "WARNING: binary files that potentially contain sensitive " ++ "information will NOT be removed from the final archive\n" ++ ) + pool = ThreadPoolExecutor(self.opts.jobs) + pool.map(self.obfuscate_report, self.report_paths, chunksize=1) + pool.shutdown(wait=True) +@@ -539,6 +550,10 @@ third party. + short_name = fname.split(archive.archive_name + '/')[1] + if archive.should_skip_file(short_name): + continue ++ if (not self.opts.keep_binary_files and ++ archive.should_remove_file(short_name)): ++ archive.remove_file(short_name) ++ continue + try: + count = self.obfuscate_file(fname, short_name, + archive.archive_name) +@@ -574,7 +589,11 @@ third party. + arc_md.add_field('files_obfuscated', len(archive.file_sub_list)) + arc_md.add_field('total_substitutions', archive.total_sub_count) + self.completed_reports.append(archive) +- archive.report_msg("Obfuscation completed") ++ rmsg = '' ++ if archive.removed_file_count: ++ rmsg = " [removed %s unprocessable files]" ++ rmsg = rmsg % archive.removed_file_count ++ archive.report_msg("Obfuscation completed%s" % rmsg) + + except Exception as err: + self.ui_log.info("Exception while processing %s: %s" +diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py +index c64ab13b..76841b51 100644 +--- a/sos/cleaner/obfuscation_archive.py ++++ b/sos/cleaner/obfuscation_archive.py +@@ -28,6 +28,7 @@ class SoSObfuscationArchive(): + + file_sub_list = [] + total_sub_count = 0 ++ removed_file_count = 0 + + def __init__(self, archive_path, tmpdir): + self.archive_path = archive_path +@@ -62,11 +63,7 @@ class SoSObfuscationArchive(): + 'sys/firmware', + 'sys/fs', + 'sys/kernel/debug', +- 'sys/module', +- r'.*\.tar$', # TODO: support archive unpacking +- # Be explicit with these tar matches to avoid matching commands +- r'.*\.tar\.xz', +- '.*.gz' ++ 'sys/module' + ] + + @property +@@ -76,6 +73,17 @@ class SoSObfuscationArchive(): + except Exception: + return False + ++ def remove_file(self, fname): ++ """Remove a file from the archive. This is used when cleaner encounters ++ a binary file, which we cannot reliably obfuscate. ++ """ ++ full_fname = self.get_file_path(fname) ++ # don't call a blank remove() here ++ if full_fname: ++ self.log_info("Removing binary file '%s' from archive" % fname) ++ os.remove(full_fname) ++ self.removed_file_count += 1 ++ + def extract(self): + if self.is_tarfile: + self.report_msg("Extracting...") +@@ -227,3 +235,52 @@ class SoSObfuscationArchive(): + if filename.startswith(_skip) or re.match(_skip, filename): + return True + return False ++ ++ def should_remove_file(self, fname): ++ """Determine if the file should be removed or not, due to an inability ++ to reliably obfuscate that file based on the filename. ++ ++ :param fname: Filename relative to the extracted archive root ++ :type fname: ``str`` ++ ++ :returns: ``True`` if the file cannot be reliably obfuscated ++ :rtype: ``bool`` ++ """ ++ obvious_removes = [ ++ r'.*\.gz', # TODO: support flat gz/xz extraction ++ r'.*\.xz', ++ r'.*\.bzip2', ++ r'.*\.tar\..*', # TODO: support archive unpacking ++ r'.*\.txz$', ++ r'.*\.tgz$', ++ r'.*\.bin', ++ r'.*\.journal', ++ r'.*\~$' ++ ] ++ ++ # if the filename matches, it is obvious we can remove them without ++ # doing the read test ++ for _arc_reg in obvious_removes: ++ if re.match(_arc_reg, fname): ++ return True ++ ++ return self.file_is_binary(fname) ++ ++ def file_is_binary(self, fname): ++ """Determine if the file is a binary file or not. ++ ++ ++ :param fname: Filename relative to the extracted archive root ++ :type fname: ``str`` ++ ++ :returns: ``True`` if file is binary, else ``False`` ++ :rtype: ``bool`` ++ """ ++ with open(self.get_file_path(fname), 'tr') as tfile: ++ try: ++ # when opened as above (tr), reading binary content will raise ++ # an exception ++ tfile.read(1) ++ return False ++ except UnicodeDecodeError: ++ return True +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index 9884836c..469db60d 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent): + 'jobs': 4, + 'keywords': [], + 'keyword_file': None, ++ 'keep_binary_files': False, + 'label': '', + 'list_options': False, + 'log_size': 0, +@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent): + dest='clean', + default=False, action='store_true', + help='Obfuscate sensistive information') ++ cleaner_grp.add_argument('--keep-binary-files', default=False, ++ action='store_true', dest='keep_binary_files', ++ help='Keep unprocessable binary files in the ' ++ 'archive instead of removing them') + cleaner_grp.add_argument('--domains', dest='domains', default=[], + action='extend', + help='Additional domain names to obfuscate') +diff --git a/sos/report/__init__.py b/sos/report/__init__.py +index d4345409..2cedc76e 100644 +--- a/sos/report/__init__.py ++++ b/sos/report/__init__.py +@@ -82,6 +82,7 @@ class SoSReport(SoSComponent): + 'case_id': '', + 'chroot': 'auto', + 'clean': False, ++ 'keep_binary_files': False, + 'desc': '', + 'domains': [], + 'dry_run': False, +@@ -344,6 +345,11 @@ class SoSReport(SoSComponent): + default='/etc/sos/cleaner/default_mapping', + help=('Provide a previously generated mapping' + ' file for obfuscation')) ++ cleaner_grp.add_argument('--keep-binary-files', default=False, ++ action='store_true', ++ dest='keep_binary_files', ++ help='Keep unprocessable binary files in the ' ++ 'archive instead of removing them') + cleaner_grp.add_argument('--usernames', dest='usernames', default=[], + action='extend', + help='List of usernames to obfuscate') + +From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 11 Jun 2021 23:20:59 -0400 +Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options + +Like we now do for primary nodes, add the ability to individually set +node options via a new `set_node_options()` method for when blanket +setting options across all nodes via the options class attrs is not +sufficient. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/__init__.py | 10 ++++++++++ + sos/collector/sosnode.py | 6 ++++-- + 2 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index 90e62d79..c4da1ab8 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -137,6 +137,16 @@ class Cluster(): + """ + self.cluster_ssh_key = key + ++ def set_node_options(self, node): ++ """If there is a need to set specific options on ONLY the non-primary ++ nodes in a collection, override this method in the cluster profile ++ and do that here. ++ ++ :param node: The non-primary node ++ :type node: ``SoSNode`` ++ """ ++ pass ++ + def set_master_options(self, node): + """If there is a need to set specific options in the sos command being + run on the cluster's master nodes, override this method in the cluster +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 1fc03076..7e784aa1 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -657,6 +657,8 @@ class SosNode(): + # set master-only options + if self.cluster.check_node_is_master(self): + self.cluster.set_master_options(self) ++ else: ++ self.cluster.set_node_options(self) + + def finalize_sos_cmd(self): + """Use host facts and compare to the cluster type to modify the sos +@@ -713,13 +715,13 @@ class SosNode(): + sos_opts.append('--cmd-timeout=%s' + % quote(str(self.opts.cmd_timeout))) + ++ self.update_cmd_from_cluster() ++ + sos_cmd = sos_cmd.replace( + 'sosreport', + os.path.join(self.host.sos_bin_path, self.sos_bin) + ) + +- self.update_cmd_from_cluster() +- + if self.opts.only_plugins: + plugs = [o for o in self.opts.only_plugins + if self._plugin_exists(o)] +-- +2.26.3 + + +From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Sat, 12 Jun 2021 00:02:36 -0400 +Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()` + +Updates `run_command()` to support passing new environment variables to +the command being run, for that command alone. This parameter takes a +dict, and if set we will first copy the existing set of env vars on the +node and then update that set of variables using the passed dict. + +Additionally, `execute_sos_command()` will now try to pass a new +`sos_env_vars` dict (default empty) so that clusters may set environment +variables specifically for the sos command being run, without having to +modify the actual sos command being executed. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/sosnode.py | 27 ++++++++++++++++++++++++--- + 1 file changed, 24 insertions(+), 3 deletions(-) + +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 7e784aa1..40472a4e 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -45,6 +45,8 @@ class SosNode(): + self.host = None + self.cluster = None + self.hostname = None ++ self.sos_env_vars = {} ++ self._env_vars = {} + self._password = password or self.opts.password + if not self.opts.nopasswd_sudo and not self.opts.sudo_pw: + self.opts.sudo_pw = self._password +@@ -109,6 +111,21 @@ class SosNode(): + def _fmt_msg(self, msg): + return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg) + ++ @property ++ def env_vars(self): ++ if not self._env_vars: ++ if self.local: ++ self._env_vars = os.environ.copy() ++ else: ++ ret = self.run_command("env --null") ++ if ret['status'] == 0: ++ for ln in ret['output'].split('\x00'): ++ if not ln: ++ continue ++ _val = ln.split('=') ++ self._env_vars[_val[0]] = _val[1] ++ return self._env_vars ++ + def set_node_manifest(self, manifest): + """Set the manifest section that this node will write to + """ +@@ -404,7 +421,7 @@ class SosNode(): + return self.host.package_manager.pkg_by_name(pkg) is not None + + def run_command(self, cmd, timeout=180, get_pty=False, need_root=False, +- force_local=False, use_container=False): ++ force_local=False, use_container=False, env=None): + """Runs a given cmd, either via the SSH session or locally + + Arguments: +@@ -446,7 +463,10 @@ class SosNode(): + else: + if get_pty: + cmd = "/bin/bash -c %s" % quote(cmd) +- res = pexpect.spawn(cmd, encoding='utf-8') ++ if env: ++ _cmd_env = self.env_vars ++ _cmd_env.update(env) ++ res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env) + if need_root: + if self.need_sudo: + res.sendline(self.opts.sudo_pw) +@@ -830,7 +850,8 @@ class SosNode(): + res = self.run_command(self.sos_cmd, + timeout=self.opts.timeout, + get_pty=True, need_root=True, +- use_container=True) ++ use_container=True, ++ env=self.sos_env_vars) + if res['status'] == 0: + for line in res['stdout'].splitlines(): + if fnmatch.fnmatch(line, '*sosreport-*tar*'): +-- +2.26.3 + + +From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 15 Jun 2021 12:43:27 -0400 +Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options + between nodes + +This commit overhauls the function of `finalize_sos_cmd()` in several +ways. + +First, assign the sos report plugin related options directly to private +copies of those values for each node, so that the shared cluster profile +does not clober options between nodes. + +Second, provide a default Lock mechanism for clusters that need to +perform some node-comparison logic when assigning options based on node +role. + +Finally, finalize the sos command for each node _prior_ to the call to +`SoSNode.sosreport()` so that we can be sure that clusters are able to +appropriately compare and assign sos options across nodes before some +nodes have already started and/or finished their own sos report +collections. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/__init__.py | 14 +++++ + sos/collector/clusters/__init__.py | 2 + + sos/collector/sosnode.py | 89 +++++++++++++++++------------- + 3 files changed, 67 insertions(+), 38 deletions(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index 469db60d..7b8cfcf7 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to. + "concurrently\n" + % (self.report_num, self.opts.jobs)) + ++ npool = ThreadPoolExecutor(self.opts.jobs) ++ npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1) ++ npool.shutdown(wait=True) ++ + pool = ThreadPoolExecutor(self.opts.jobs) + pool.map(self._collect, self.client_list, chunksize=1) + pool.shutdown(wait=True) +@@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to. + except Exception as err: + self.ui_log.error("Upload attempt failed: %s" % err) + ++ def _finalize_sos_cmd(self, client): ++ """Calls finalize_sos_cmd() on each node so that we have the final ++ command before we thread out the actual execution of sos ++ """ ++ try: ++ client.finalize_sos_cmd() ++ except Exception as err: ++ self.log_error("Could not finalize sos command for %s: %s" ++ % (client.address, err)) ++ + def _collect(self, client): + """Runs sosreport on each node""" + try: +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index c4da1ab8..bb728bc0 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -11,6 +11,7 @@ + import logging + + from sos.options import ClusterOption ++from threading import Lock + + + class Cluster(): +@@ -66,6 +67,7 @@ class Cluster(): + if cls.__name__ != 'Cluster': + self.cluster_type.append(cls.__name__) + self.node_list = None ++ self.lock = Lock() + self.soslog = logging.getLogger('sos') + self.ui_log = logging.getLogger('sos_ui') + self.options = [] +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 40472a4e..1c25cc34 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -38,6 +38,7 @@ class SosNode(): + self.address = address.strip() + self.commons = commons + self.opts = commons['cmdlineopts'] ++ self._assign_config_opts() + self.tmpdir = commons['tmpdir'] + self.hostlen = commons['hostlen'] + self.need_sudo = commons['need_sudo'] +@@ -465,8 +466,8 @@ class SosNode(): + cmd = "/bin/bash -c %s" % quote(cmd) + if env: + _cmd_env = self.env_vars +- _cmd_env.update(env) +- res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env) ++ env = _cmd_env.update(env) ++ res = pexpect.spawn(cmd, encoding='utf-8', env=env) + if need_root: + if self.need_sudo: + res.sendline(self.opts.sudo_pw) +@@ -484,9 +485,6 @@ class SosNode(): + + def sosreport(self): + """Run a sosreport on the node, then collect it""" +- self.sos_cmd = self.finalize_sos_cmd() +- self.log_info('Final sos command set to %s' % self.sos_cmd) +- self.manifest.add_field('final_sos_command', self.sos_cmd) + try: + path = self.execute_sos_command() + if path: +@@ -656,29 +654,42 @@ class SosNode(): + This will NOT override user supplied options. + """ + if self.cluster.sos_preset: +- if not self.opts.preset: +- self.opts.preset = self.cluster.sos_preset ++ if not self.preset: ++ self.preset = self.cluster.sos_preset + else: + self.log_info('Cluster specified preset %s but user has also ' + 'defined a preset. Using user specification.' + % self.cluster.sos_preset) + if self.cluster.sos_plugins: + for plug in self.cluster.sos_plugins: +- if plug not in self.opts.enable_plugins: +- self.opts.enable_plugins.append(plug) ++ if plug not in self.enable_plugins: ++ self.enable_plugins.append(plug) + + if self.cluster.sos_plugin_options: + for opt in self.cluster.sos_plugin_options: +- if not any(opt in o for o in self.opts.plugin_options): ++ if not any(opt in o for o in self.plugin_options): + option = '%s=%s' % (opt, + self.cluster.sos_plugin_options[opt]) +- self.opts.plugin_options.append(option) ++ self.plugin_options.append(option) + + # set master-only options + if self.cluster.check_node_is_master(self): +- self.cluster.set_master_options(self) ++ with self.cluster.lock: ++ self.cluster.set_master_options(self) + else: +- self.cluster.set_node_options(self) ++ with self.cluster.lock: ++ self.cluster.set_node_options(self) ++ ++ def _assign_config_opts(self): ++ """From the global opts configuration, assign those values locally ++ to this node so that they may be acted on individually. ++ """ ++ # assign these to new, private copies ++ self.only_plugins = list(self.opts.only_plugins) ++ self.skip_plugins = list(self.opts.skip_plugins) ++ self.enable_plugins = list(self.opts.enable_plugins) ++ self.plugin_options = list(self.opts.plugin_options) ++ self.preset = list(self.opts.preset) + + def finalize_sos_cmd(self): + """Use host facts and compare to the cluster type to modify the sos +@@ -742,59 +753,61 @@ class SosNode(): + os.path.join(self.host.sos_bin_path, self.sos_bin) + ) + +- if self.opts.only_plugins: +- plugs = [o for o in self.opts.only_plugins +- if self._plugin_exists(o)] +- if len(plugs) != len(self.opts.only_plugins): +- not_only = list(set(self.opts.only_plugins) - set(plugs)) ++ if self.only_plugins: ++ plugs = [o for o in self.only_plugins if self._plugin_exists(o)] ++ if len(plugs) != len(self.only_plugins): ++ not_only = list(set(self.only_plugins) - set(plugs)) + self.log_debug('Requested plugins %s were requested to be ' + 'enabled but do not exist' % not_only) +- only = self._fmt_sos_opt_list(self.opts.only_plugins) ++ only = self._fmt_sos_opt_list(self.only_plugins) + if only: + sos_opts.append('--only-plugins=%s' % quote(only)) +- return "%s %s" % (sos_cmd, ' '.join(sos_opts)) ++ self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) ++ self.log_info('Final sos command set to %s' % self.sos_cmd) ++ self.manifest.add_field('final_sos_command', self.sos_cmd) ++ return + +- if self.opts.skip_plugins: ++ if self.skip_plugins: + # only run skip-plugins for plugins that are enabled +- skip = [o for o in self.opts.skip_plugins +- if self._check_enabled(o)] +- if len(skip) != len(self.opts.skip_plugins): +- not_skip = list(set(self.opts.skip_plugins) - set(skip)) ++ skip = [o for o in self.skip_plugins if self._check_enabled(o)] ++ if len(skip) != len(self.skip_plugins): ++ not_skip = list(set(self.skip_plugins) - set(skip)) + self.log_debug('Requested to skip plugins %s, but plugins are ' + 'already not enabled' % not_skip) + skipln = self._fmt_sos_opt_list(skip) + if skipln: + sos_opts.append('--skip-plugins=%s' % quote(skipln)) + +- if self.opts.enable_plugins: ++ if self.enable_plugins: + # only run enable for plugins that are disabled +- opts = [o for o in self.opts.enable_plugins +- if o not in self.opts.skip_plugins ++ opts = [o for o in self.enable_plugins ++ if o not in self.skip_plugins + and self._check_disabled(o) and self._plugin_exists(o)] +- if len(opts) != len(self.opts.enable_plugins): +- not_on = list(set(self.opts.enable_plugins) - set(opts)) ++ if len(opts) != len(self.enable_plugins): ++ not_on = list(set(self.enable_plugins) - set(opts)) + self.log_debug('Requested to enable plugins %s, but plugins ' + 'are already enabled or do not exist' % not_on) + enable = self._fmt_sos_opt_list(opts) + if enable: + sos_opts.append('--enable-plugins=%s' % quote(enable)) + +- if self.opts.plugin_options: +- opts = [o for o in self.opts.plugin_options ++ if self.plugin_options: ++ opts = [o for o in self.plugin_options + if self._plugin_exists(o.split('.')[0]) + and self._plugin_option_exists(o.split('=')[0])] + if opts: + sos_opts.append('-k %s' % quote(','.join(o for o in opts))) + +- if self.opts.preset: +- if self._preset_exists(self.opts.preset): +- sos_opts.append('--preset=%s' % quote(self.opts.preset)) ++ if self.preset: ++ if self._preset_exists(self.preset): ++ sos_opts.append('--preset=%s' % quote(self.preset)) + else: + self.log_debug('Requested to enable preset %s but preset does ' +- 'not exist on node' % self.opts.preset) ++ 'not exist on node' % self.preset) + +- _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) +- return _sos_cmd ++ self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) ++ self.log_info('Final sos command set to %s' % self.sos_cmd) ++ self.manifest.add_field('final_sos_command', self.sos_cmd) + + def determine_sos_label(self): + """Determine what, if any, label should be added to the sosreport""" +-- +2.26.3 + + +From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 15 Jun 2021 18:38:34 -0400 +Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation + +Improves handling of symlink obfuscation by only performing the +obfuscaiton on the ultimate target of any symlinks encountered. Now, +when a symlink is encountered, clean will obfuscate the link name and +re-write it in the archive, pointing to the (potentially obfuscated) +target name. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 19 deletions(-) + +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index abfb684b..b38c8dfc 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -612,28 +612,55 @@ third party. + if not filename: + # the requested file doesn't exist in the archive + return +- self.log_debug("Obfuscating %s" % short_name or filename, +- caller=arc_name) + subs = 0 +- tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) +- with open(filename, 'r') as fname: +- for line in fname: +- try: +- line, count = self.obfuscate_line(line) +- subs += count +- tfile.write(line) +- except Exception as err: +- self.log_debug("Unable to obfuscate %s: %s" +- % (short_name, err), caller=arc_name) +- tfile.seek(0) +- if subs: +- shutil.copy(tfile.name, filename) +- tfile.close() +- _ob_filename = self.obfuscate_string(short_name) +- if _ob_filename != short_name: ++ if not os.path.islink(filename): ++ # don't run the obfuscation on the link, but on the actual file ++ # at some other point. ++ self.log_debug("Obfuscating %s" % short_name or filename, ++ caller=arc_name) ++ tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) ++ with open(filename, 'r') as fname: ++ for line in fname: ++ try: ++ line, count = self.obfuscate_line(line) ++ subs += count ++ tfile.write(line) ++ except Exception as err: ++ self.log_debug("Unable to obfuscate %s: %s" ++ % (short_name, err), caller=arc_name) ++ tfile.seek(0) ++ if subs: ++ shutil.copy(tfile.name, filename) ++ tfile.close() ++ ++ _ob_short_name = self.obfuscate_string(short_name.split('/')[-1]) ++ _ob_filename = short_name.replace(short_name.split('/')[-1], ++ _ob_short_name) ++ _sym_changed = False ++ if os.path.islink(filename): ++ _link = os.readlink(filename) ++ _ob_link = self.obfuscate_string(_link) ++ if _ob_link != _link: ++ _sym_changed = True ++ ++ if (_ob_filename != short_name) or _sym_changed: + arc_path = filename.split(short_name)[0] + _ob_path = os.path.join(arc_path, _ob_filename) +- os.rename(filename, _ob_path) ++ # ensure that any plugin subdirs that contain obfuscated strings ++ # get created with obfuscated counterparts ++ if not os.path.islink(filename): ++ os.rename(filename, _ob_path) ++ else: ++ # generate the obfuscated name of the link target ++ _target_ob = self.obfuscate_string(os.readlink(filename)) ++ # remove the unobfuscated original symlink first, in case the ++ # symlink name hasn't changed but the target has ++ os.remove(filename) ++ # create the newly obfuscated symlink, pointing to the ++ # obfuscated target name, which may not exist just yet, but ++ # when the actual file is obfuscated, will be created ++ os.symlink(_target_ob, _ob_path) ++ + return subs + + def obfuscate_string(self, string_data): +-- +2.26.3 + + +From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 15 Jun 2021 22:56:19 -0400 +Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match + first + +When matching strings in parsers to do obfuscation, we should be using +the most precise matches found first, rather than matching in the order +a match is hit. This ensures that we correctly obfuscate an entire +string, rather than potentially only partial substring(s) that exist +within the entire match. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/parsers/__init__.py | 10 +++++++--- + sos/cleaner/parsers/keyword_parser.py | 2 +- + sos/cleaner/parsers/username_parser.py | 2 +- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py +index c77300aa..cfa20b95 100644 +--- a/sos/cleaner/parsers/__init__.py ++++ b/sos/cleaner/parsers/__init__.py +@@ -82,10 +82,12 @@ class SoSCleanerParser(): + for pattern in self.regex_patterns: + matches = [m[0] for m in re.findall(pattern, line, re.I)] + if matches: ++ matches.sort(reverse=True, key=lambda x: len(x)) + count += len(matches) + for match in matches: +- new_match = self.mapping.get(match.strip()) +- line = line.replace(match.strip(), new_match) ++ match = match.strip() ++ new_match = self.mapping.get(match) ++ line = line.replace(match, new_match) + return line, count + + def parse_string_for_keys(self, string_data): +@@ -102,7 +104,9 @@ class SoSCleanerParser(): + :returns: The obfuscated line + :rtype: ``str`` + """ +- for key, val in self.mapping.dataset.items(): ++ for pair in sorted(self.mapping.dataset.items(), reverse=True, ++ key=lambda x: len(x[0])): ++ key, val = pair + if key in string_data: + string_data = string_data.replace(key, val) + return string_data +diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py +index 3dc2b7f0..9134f82d 100644 +--- a/sos/cleaner/parsers/keyword_parser.py ++++ b/sos/cleaner/parsers/keyword_parser.py +@@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser): + + def parse_line(self, line): + count = 0 +- for keyword in self.user_keywords: ++ for keyword in sorted(self.user_keywords, reverse=True): + if keyword in line: + line = line.replace(keyword, self.mapping.get(keyword)) + count += 1 +diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py +index 2bb6c7f3..0c3bbac4 100644 +--- a/sos/cleaner/parsers/username_parser.py ++++ b/sos/cleaner/parsers/username_parser.py +@@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser): + + def parse_line(self, line): + count = 0 +- for username in self.mapping.dataset.keys(): ++ for username in sorted(self.mapping.dataset.keys(), reverse=True): + if username in line: + count = line.count(username) + line = line.replace(username, self.mapping.get(username)) +-- +2.26.3 + + +From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 16 Jun 2021 01:15:45 -0400 +Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within + archives + +This commits adds a step to `obfuscate_report()` that explicitly walks +through all directories in the archive, and obfuscates the directory +names if necessary. + +Since this uses `obfuscate_string()` for the directory names, a +`skip_keys` list has been added to maps to allow parsers/maps to +specify matched keys (such as short names for the hostname parser) that +should not be considered when obfuscating directory names (e.g. 'www'). + +Closes: #2465 + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/__init__.py | 26 ++++++++++++++++++++++++++ + sos/cleaner/mappings/__init__.py | 4 +++- + sos/cleaner/mappings/hostname_map.py | 5 +++++ + sos/cleaner/obfuscation_archive.py | 20 ++++++++++++++++++-- + sos/cleaner/parsers/__init__.py | 2 ++ + 5 files changed, 54 insertions(+), 3 deletions(-) + +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index b38c8dfc..88d4d0ea 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -562,6 +562,11 @@ third party. + except Exception as err: + self.log_debug("Unable to parse file %s: %s" + % (short_name, err)) ++ try: ++ self.obfuscate_directory_names(archive) ++ except Exception as err: ++ self.log_info("Failed to obfuscate directories: %s" % err, ++ caller=archive.archive_name) + + # if the archive was already a tarball, repack it + method = archive.get_compression() +@@ -663,6 +668,27 @@ third party. + + return subs + ++ def obfuscate_directory_names(self, archive): ++ """For all directories that exist within the archive, obfuscate the ++ directory name if it contains sensitive strings found during execution ++ """ ++ self.log_info("Obfuscating directory names in archive %s" ++ % archive.archive_name) ++ for dirpath in sorted(archive.get_directory_list(), reverse=True): ++ for _name in os.listdir(dirpath): ++ _dirname = os.path.join(dirpath, _name) ++ _arc_dir = _dirname.split(archive.extracted_path)[-1] ++ if os.path.isdir(_dirname): ++ _ob_dirname = self.obfuscate_string(_name) ++ if _ob_dirname != _name: ++ _ob_arc_dir = _arc_dir.rstrip(_name) ++ _ob_arc_dir = os.path.join( ++ archive.extracted_path, ++ _ob_arc_dir.lstrip('/'), ++ _ob_dirname ++ ) ++ os.rename(_dirname, _ob_arc_dir) ++ + def obfuscate_string(self, string_data): + for parser in self.parsers: + try: +diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py +index dd464e5a..5cf5c8b2 100644 +--- a/sos/cleaner/mappings/__init__.py ++++ b/sos/cleaner/mappings/__init__.py +@@ -20,8 +20,10 @@ class SoSMap(): + corresponding SoSMap() object, to allow for easy retrieval of obfuscated + items. + """ +- ++ # used for regex skips in parser.parse_line() + ignore_matches = [] ++ # used for filename obfuscations in parser.parse_string_for_keys() ++ skip_keys = [] + + def __init__(self): + self.dataset = {} +diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py +index e0b7bf1d..c9a44d8d 100644 +--- a/sos/cleaner/mappings/hostname_map.py ++++ b/sos/cleaner/mappings/hostname_map.py +@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap): + '^com..*' + ] + ++ skip_keys = [ ++ 'www', ++ 'api' ++ ] ++ + host_count = 0 + domain_count = 0 + _domains = {} +diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py +index 88f978d9..90188358 100644 +--- a/sos/cleaner/obfuscation_archive.py ++++ b/sos/cleaner/obfuscation_archive.py +@@ -202,10 +202,22 @@ class SoSObfuscationArchive(): + """Return a list of all files within the archive""" + self.file_list = [] + for dirname, dirs, files in os.walk(self.extracted_path): ++ for _dir in dirs: ++ _dirpath = os.path.join(dirname, _dir) ++ # catch dir-level symlinks ++ if os.path.islink(_dirpath) and os.path.isdir(_dirpath): ++ self.file_list.append(_dirpath) + for filename in files: + self.file_list.append(os.path.join(dirname, filename)) + return self.file_list + ++ def get_directory_list(self): ++ """Return a list of all directories within the archive""" ++ dir_list = [] ++ for dirname, dirs, files in os.walk(self.extracted_path): ++ dir_list.append(dirname) ++ return dir_list ++ + def update_sub_count(self, fname, count): + """Called when a file has finished being parsed and used to track + total substitutions made and number of files that had changes made +@@ -230,7 +242,8 @@ class SoSObfuscationArchive(): + archive root + """ + +- if not os.path.isfile(self.get_file_path(filename)): ++ if (not os.path.isfile(self.get_file_path(filename)) and not ++ os.path.islink(self.get_file_path(filename))): + return True + + for _skip in self.skip_list: +@@ -266,7 +279,10 @@ class SoSObfuscationArchive(): + if re.match(_arc_reg, fname): + return True + +- return self.file_is_binary(fname) ++ if os.path.isfile(self.get_file_path(fname)): ++ return self.file_is_binary(fname) ++ # don't fail on dir-level symlinks ++ return False + + def file_is_binary(self, fname): + """Determine if the file is a binary file or not. +diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py +index cfa20b95..84874475 100644 +--- a/sos/cleaner/parsers/__init__.py ++++ b/sos/cleaner/parsers/__init__.py +@@ -107,6 +107,8 @@ class SoSCleanerParser(): + for pair in sorted(self.mapping.dataset.items(), reverse=True, + key=lambda x: len(x[0])): + key, val = pair ++ if key in self.mapping.skip_keys: ++ continue + if key in string_data: + string_data = string_data.replace(key, val) + return string_data +-- +2.26.3 + + +From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 16 Jun 2021 02:24:51 -0400 +Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with + FQDN substrings + +This commit updates the hostname parser and associated map to be able to +better detect and obfuscate FQDN substrings within file content and file +names, particularly when the regex patterns failed to match a hostname +that is formatted with '_' characters rather than '.' characters. + +The `get()` method has been updated to alow preserve characters and +certain extensions that are not part of the FQDN, but are brought in by +the regex pattern due to the fact that we need to use word boundary +indicators within the pattern. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/mappings/hostname_map.py | 59 +++++++++++++++++++++++--- + sos/cleaner/parsers/__init__.py | 3 +- + sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++--- + 3 files changed, 81 insertions(+), 11 deletions(-) + +diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py +index c9a44d8d..d4b2c88e 100644 +--- a/sos/cleaner/mappings/hostname_map.py ++++ b/sos/cleaner/mappings/hostname_map.py +@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap): + host = domain.split('.') + if len(host) == 1: + # don't block on host's shortname +- return True ++ return host[0] in self.hosts.keys() + else: + domain = host[0:-1] + for known_domain in self._domains: +@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap): + return False + + def get(self, item): +- if item.startswith(('.', '_')): +- item = item.lstrip('._') +- item = item.strip() ++ prefix = '' ++ suffix = '' ++ final = None ++ # The regex pattern match may include a leading and/or trailing '_' ++ # character due to the need to use word boundary matching, so we need ++ # to strip these from the string during processing, but still keep them ++ # in the returned string to not mangle the string replacement in the ++ # context of the file or filename ++ while item.startswith(('.', '_')): ++ prefix += item[0] ++ item = item[1:] ++ while item.endswith(('.', '_')): ++ suffix += item[-1] ++ item = item[0:-1] + if not self.domain_name_in_loaded_domains(item.lower()): + return item +- return super(SoSHostnameMap, self).get(item) ++ if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')): ++ ext = '.' + item.split('.')[-1] ++ item = item.replace(ext, '') ++ suffix += ext ++ if item not in self.dataset.keys(): ++ # try to account for use of '-' in names that include hostnames ++ # and don't create new mappings for each of these ++ for _existing in sorted(self.dataset.keys(), reverse=True, ++ key=lambda x: len(x)): ++ _host_substr = False ++ _test = item.split(_existing) ++ _h = _existing.split('.') ++ # avoid considering a full FQDN match as a new match off of ++ # the hostname of an existing match ++ if _h[0] and _h[0] in self.hosts.keys(): ++ _host_substr = True ++ if len(_test) == 1 or not _test[0]: ++ # does not match existing obfuscation ++ continue ++ elif _test[0].endswith('.') and not _host_substr: ++ # new hostname in known domain ++ final = super(SoSHostnameMap, self).get(item) ++ break ++ elif item.split(_test[0]): ++ # string that includes existing FQDN obfuscation substring ++ # so, only obfuscate the FQDN part ++ try: ++ itm = item.split(_test[0])[1] ++ final = _test[0] + super(SoSHostnameMap, self).get(itm) ++ break ++ except Exception: ++ # fallback to still obfuscating the entire item ++ pass ++ ++ if not final: ++ final = super(SoSHostnameMap, self).get(item) ++ return prefix + final + suffix + + def sanitize_item(self, item): + host = item.split('.') +@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap): + """Obfuscate the short name of the host with an incremented counter + based on the total number of obfuscated host names + """ ++ if not hostname: ++ return hostname + if hostname not in self.hosts: + ob_host = "host%s" % self.host_count + self.hosts[hostname] = ob_host +diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py +index 84874475..57d2020a 100644 +--- a/sos/cleaner/parsers/__init__.py ++++ b/sos/cleaner/parsers/__init__.py +@@ -87,7 +87,8 @@ class SoSCleanerParser(): + for match in matches: + match = match.strip() + new_match = self.mapping.get(match) +- line = line.replace(match, new_match) ++ if new_match != match: ++ line = line.replace(match, new_match) + return line, count + + def parse_string_for_keys(self, string_data): +diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py +index 9982024b..3de6bb08 100644 +--- a/sos/cleaner/parsers/hostname_parser.py ++++ b/sos/cleaner/parsers/hostname_parser.py +@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser): + map_file_key = 'hostname_map' + prep_map_file = 'sos_commands/host/hostname' + regex_patterns = [ +- r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))' ++ r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' + ] + + def __init__(self, conf_file=None, opt_domains=None): +@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser): + """Override the default parse_line() method to also check for the + shortname of the host derived from the hostname. + """ ++ ++ def _check_line(ln, count, search, repl=None): ++ """Perform a second manual check for substrings that may have been ++ missed by regex matching ++ """ ++ if search in self.mapping.skip_keys: ++ return ln, count ++ if search in ln: ++ count += ln.count(search) ++ ln = ln.replace(search, self.mapping.get(repl or search)) ++ return ln, count ++ + count = 0 + line, count = super(SoSHostnameParser, self).parse_line(line) +- for short_name in self.short_names: +- if short_name in line: +- count += 1 +- line = line.replace(short_name, self.mapping.get(short_name)) ++ # make an additional pass checking for '_' formatted substrings that ++ # the regex patterns won't catch ++ hosts = [h for h in self.mapping.dataset.keys() if '.' in h] ++ for host in sorted(hosts, reverse=True, key=lambda x: len(x)): ++ fqdn = host ++ for c in '.-': ++ fqdn = fqdn.replace(c, '_') ++ line, count = _check_line(line, count, fqdn, host) ++ ++ for short_name in sorted(self.short_names, reverse=True): ++ line, count = _check_line(line, count, short_name) ++ + return line, count +-- +2.26.3 + + +From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 14 Jun 2021 09:31:07 -0400 +Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile + +Adds explicit setting of primary/node sos options for the `openshift` +plugin within the cluster, rather than relying on default configurations +and best practices to avoid duplicate collections. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++-- + sos/collector/sosnode.py | 4 +-- + 2 files changed, 65 insertions(+), 4 deletions(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 283fcfd1..ddff84a4 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -8,6 +8,8 @@ + # + # See the LICENSE file in the source distribution for further information. + ++import os ++ + from pipes import quote + from sos.collector.clusters import Cluster + +@@ -18,10 +20,14 @@ class ocp(Cluster): + cluster_name = 'OpenShift Container Platform v4' + packages = ('openshift-hyperkube', 'openshift-clients') + ++ api_collect_enabled = False ++ token = None ++ + option_list = [ + ('label', '', 'Colon delimited list of labels to select nodes with'), + ('role', '', 'Colon delimited list of roles to select nodes with'), +- ('kubeconfig', '', 'Path to the kubeconfig file') ++ ('kubeconfig', '', 'Path to the kubeconfig file'), ++ ('token', '', 'Service account token to use for oc authorization') + ] + + def fmt_oc_cmd(self, cmd): +@@ -32,9 +38,20 @@ class ocp(Cluster): + return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd) + return "oc %s" % cmd + ++ def _attempt_oc_login(self): ++ """Attempt to login to the API using the oc command using a provided ++ token ++ """ ++ _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True" ++ " --token=%s" % self.token) ++ return _res['status'] == 0 ++ + def check_enabled(self): + if super(ocp, self).check_enabled(): + return True ++ self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None) ++ if self.token: ++ self._attempt_oc_login() + _who = self.fmt_oc_cmd('whoami') + return self.exec_master_cmd(_who)['status'] == 0 + +@@ -106,4 +123,48 @@ class ocp(Cluster): + return 'master' in self.node_dict[sosnode.address]['roles'] + + def set_master_options(self, node): +- node.opts.enable_plugins.append('openshift') ++ node.enable_plugins.append('openshift') ++ if self.api_collect_enabled: ++ # a primary has already been enabled for API collection, disable ++ # it among others ++ node.plugin_options.append('openshift.no-oc=on') ++ else: ++ _oc_cmd = 'oc' ++ if node.host.containerized: ++ _oc_cmd = '/host/bin/oc' ++ # when run from a container, the oc command does not inherit ++ # the default config, so if it's present then pass it here to ++ # detect a funcitonal oc command. This is sidestepped in sos ++ # report by being able to chroot the `oc` execution which we ++ # cannot do remotely ++ if node.file_exists('/root/.kube/config', need_root=True): ++ _oc_cmd += ' --kubeconfig /host/root/.kube/config' ++ can_oc = node.run_command("%s whoami" % _oc_cmd, ++ use_container=node.host.containerized, ++ # container is available only to root ++ # and if rhel, need to run sos as root ++ # anyways which will run oc as root ++ need_root=True) ++ if can_oc['status'] == 0: ++ # the primary node can already access the API ++ self.api_collect_enabled = True ++ elif self.token: ++ node.sos_env_vars['SOSOCPTOKEN'] = self.token ++ self.api_collect_enabled = True ++ elif self.get_option('kubeconfig'): ++ kc = self.get_option('kubeconfig') ++ if node.file_exists(kc): ++ if node.host.containerized: ++ kc = "/host/%s" % kc ++ node.sos_env_vars['KUBECONFIG'] = kc ++ self.api_collect_enabled = True ++ if self.api_collect_enabled: ++ msg = ("API collections will be performed on %s\nNote: API " ++ "collections may extend runtime by 10s of minutes\n" ++ % node.address) ++ self.soslog.info(msg) ++ self.ui_log.info(msg) ++ ++ def set_node_options(self, node): ++ # don't attempt OC API collections on non-primary nodes ++ node.plugin_options.append('openshift.no-oc=on') +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 1c25cc34..6597d236 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -202,11 +202,11 @@ class SosNode(): + self.opts.registry_authfile or self.host.container_authfile + ) + +- def file_exists(self, fname): ++ def file_exists(self, fname, need_root=False): + """Checks for the presence of fname on the remote node""" + if not self.local: + try: +- res = self.run_command("stat %s" % fname) ++ res = self.run_command("stat %s" % fname, need_root=need_root) + return res['status'] == 0 + except Exception: + return False +-- +2.26.3 + + +From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 17 Jun 2021 09:52:36 -0400 +Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user + +The 'core' user is a common default user on containerized hosts, and +obfuscation of it is not advantageous, much like the default 'ubuntu' +user for that distribution. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/parsers/username_parser.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py +index 0c3bbac4..64843205 100644 +--- a/sos/cleaner/parsers/username_parser.py ++++ b/sos/cleaner/parsers/username_parser.py +@@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser): + prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000' + regex_patterns = [] + skip_list = [ ++ 'core', + 'nobody', + 'nfsnobody', + 'root' +-- +2.26.3 + + +From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 18 Jun 2021 14:26:55 -0400 +Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive + list + +Fixes an issue where checksums may have gotten into the list of archives +to be cleaned, which would cause further issues later. Additionally, +prevents nested sosreports from top-level archives (such as from +`collect`) from being removed for being a binary file when that +top-level archive gets obfuscated. +--- + sos/cleaner/__init__.py | 5 +++-- + sos/cleaner/obfuscation_archive.py | 1 + + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index 88d4d0ea..8280bc50 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -226,8 +226,7 @@ third party. + nested_archives = [] + for _file in archive.getmembers(): + if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not +- (_file.name.endswith('.md5') or +- _file.name.endswith('.sha256'))): ++ (_file.name.endswith(('.md5', '.sha256')))): + nested_archives.append(_file.name.split('/')[-1]) + + if nested_archives: +@@ -235,6 +234,8 @@ third party. + nested_path = self.extract_archive(archive) + for arc_file in os.listdir(nested_path): + if re.match('sosreport.*.tar.*', arc_file): ++ if arc_file.endswith(('.md5', '.sha256')): ++ continue + self.report_paths.append(os.path.join(nested_path, + arc_file)) + # add the toplevel extracted archive +diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py +index 90188358..e357450b 100644 +--- a/sos/cleaner/obfuscation_archive.py ++++ b/sos/cleaner/obfuscation_archive.py +@@ -58,6 +58,7 @@ class SoSObfuscationArchive(): + Returns: list of files and file regexes + """ + return [ ++ 'sosreport-', + 'sys/firmware', + 'sys/fs', + 'sys/kernel/debug', +-- +2.26.3 + diff --git a/SPECS/sos.spec b/SPECS/sos.spec index 2775538..1b1481c 100644 --- a/SPECS/sos.spec +++ b/SPECS/sos.spec @@ -5,7 +5,7 @@ Summary: A set of tools to gather troubleshooting information from a system Name: sos Version: 4.1 -Release: 2%{?dist} +Release: 3%{?dist} Group: Applications/System Source0: https://github.com/sosreport/sos/archive/%{version}/sos-%{version}.tar.gz Source1: sos-audit-%{auditversion}.tgz @@ -31,6 +31,12 @@ Patch8: sos-bz1956673-pulpcore-plugin.patch Patch9: sos-bz1959413-saphana-traceback.patch Patch10: sos-bz1961458-collect-nstat.patch Patch11: sos-bz1961229-snapper-plugin-and-allocation-failures.patch +Patch12: sos-bz1925419-all-gluster-files.patch +Patch13: sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch +Patch14: sos-bz1886711-enhance-tc-hw-offload.patch +Patch15: sos-bz1965001-fix-avc-copystating-proc-sys.patch +Patch16: sos-bz1967613-sssd-common.patch +Patch17: sos-bz1973675-ocp-cluster-cleaner.patch %description @@ -53,6 +59,12 @@ support technicians and developers. %patch9 -p1 %patch10 -p1 %patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 %build %py3_build @@ -116,6 +128,20 @@ of the system. Currently storage and filesystem commands are audited. %ghost /etc/audit/rules.d/40-sos-storage.rules %changelog +* Mon Jun 21 2021 Pavel Moravec = 4.1-3 +- [gluster] collect public keys from the right dir + Resolves: bz1925419 +- [cleaner] Only skip packaging-based files for the IP parse + Resolves: bz1964499 +- [networking] collect also tc filter show ingress + Resolves: bz1886711 +- [archive] skip copying SELinux context for /proc and /sys + Resolves: bz1965001 +- [sssd] sssd plugin when sssd-common + Resolves: bz1967613 +- Various OCP/cluster/cleanup enhancements + Resolves: bz1973675 + * Tue May 18 2021 Pavel Moravec = 4.1-2 - Load maps from all archives before obfuscation Resolves: bz1930181