From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 9 Apr 2021 11:32:14 -0400 Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on first match Previously, `parse_string_for_keys()`, called by `obfuscate_string()` for non-regex based obfuscations, would return on the first match in the string found for each parser. Instead, continue iterating over all items in each parser's dataset before returning the (now fully) obfuscated string. Resolves: #2480 Signed-off-by: Jake Hunsaker --- sos/cleaner/parsers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index dd0451df..c77300aa 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -104,7 +104,7 @@ class SoSCleanerParser(): """ for key, val in self.mapping.dataset.items(): if key in string_data: - return string_data.replace(key, val) + string_data = string_data.replace(key, val) return string_data def get_map_contents(self): -- 2.26.3 From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 12 May 2021 12:39:48 -0400 Subject: [PATCH] [collect] Add options to provide registry auth for pulling images Adds options that allow a user to specify registry authentication, either via username/password or an authfile, to allow pulling an image that exists on a non-public registry. If a username/password is provided, that will be used. If not, we will attempt to use an authfile - either provided by the user or by a cluster profile. Also adds an option to forcibly pull a new(er) version of the specified image, to alleviate conditions where a too-old version of the image already exists on the host. Closes: #2534 Signed-off-by: Jake Hunsaker --- man/en/sos-collect.1 | 30 +++++++++++++++++++++++ sos/collector/__init__.py | 17 +++++++++++++ sos/collector/sosnode.py | 40 +++++++++++++++++++++++++++---- sos/policies/distros/__init__.py | 16 ++++++++++++- sos/policies/distros/redhat.py | 25 ++++++++++++------- sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++ 6 files changed, 140 insertions(+), 13 deletions(-) diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 index 286bfe71..cdbc3257 100644 --- a/man/en/sos-collect.1 +++ b/man/en/sos-collect.1 @@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes [\-\-no\-pkg\-check] [\-\-no\-local] [\-\-master MASTER] + [\-\-image IMAGE] + [\-\-force-pull-image] + [\-\-registry-user USER] + [\-\-registry-password PASSWORD] + [\-\-registry-authfile FILE] [\-o ONLY_PLUGINS] [\-p SSH_PORT] [\-\-password] @@ -245,6 +250,31 @@ Specify a master node for the cluster. If provided, then sos collect will check the master node, not localhost, for determining the type of cluster in use. .TP +\fB\-\-image IMAGE\fR +Specify an image to use for the temporary container created for collections on +containerized host, if you do not want to use the default image specifed by the +host's policy. Note that this should include the registry. +.TP +\fB\-\-force-pull-image\fR +Use this option to force the container runtime to pull the specified image (even +if it is the policy default image) even if the image already exists on the host. +This may be useful to update an older container image on containerized hosts. +.TP +\fB\-\-registry-user USER\fR +Specify the username to authenticate to the registry with in order to pull the container +image +.TP +\fB\-\-registry-password PASSWORD\fR +Specify the password to authenticate to the registry with in order to pull the container +image. If no password is required, leave this blank. +.TP +\fB\-\-registry-authfile FILE\fR +Specify the filename to use for providing authentication credentials to the registry +to pull the container image. + +Note that this file must exist on the node(s) performing the pull operations, not the +node from which \fBsos collect\fR was run. +.TP \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS Sosreport option. Run ONLY the plugins listed. diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 1c742cf5..0624caad 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -63,6 +63,7 @@ class SoSCollector(SoSComponent): 'encrypt_pass': '', 'group': None, 'image': '', + 'force_pull_image': False, 'jobs': 4, 'keywords': [], 'keyword_file': None, @@ -84,6 +85,9 @@ class SoSCollector(SoSComponent): 'plugin_timeout': None, 'cmd_timeout': None, 'preset': '', + 'registry_user': None, + 'registry_password': None, + 'registry_authfile': None, 'save_group': '', 'since': '', 'skip_commands': [], @@ -319,6 +323,19 @@ class SoSCollector(SoSComponent): collect_grp.add_argument('--image', help=('Specify the container image to use for' ' containerized hosts.')) + collect_grp.add_argument('--force-pull-image', '--pull', default=False, + action='store_true', + help='Force pull the container image even if ' + 'it already exists on the host') + collect_grp.add_argument('--registry-user', default=None, + help='Username to authenticate to the ' + 'registry with for pulling an image') + collect_grp.add_argument('--registry-password', default=None, + help='Password to authenticate to the ' + 'registry with for pulling an image') + collect_grp.add_argument('--registry-authfile', default=None, + help='Use this authfile to provide registry ' + 'authentication when pulling an image') collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key') collect_grp.add_argument('-j', '--jobs', default=4, type=int, help='Number of concurrent nodes to collect') diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 48693342..d1c11824 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -134,9 +134,27 @@ class SosNode(): """If the host is containerized, create the container we'll be using """ if self.host.containerized: - res = self.run_command(self.host.create_sos_container(), - need_root=True) - if res['status'] in [0, 125]: # 125 means container exists + cmd = self.host.create_sos_container( + image=self.opts.image, + auth=self.get_container_auth(), + force_pull=self.opts.force_pull_image + ) + res = self.run_command(cmd, need_root=True) + if res['status'] in [0, 125]: + if res['status'] == 125: + if 'unable to retrieve auth token' in res['stdout']: + self.log_error( + "Could not pull image. Provide either a username " + "and password or authfile" + ) + raise Exception + elif 'unknown: Not found' in res['stdout']: + self.log_error('Specified image not found on registry') + raise Exception + # 'name exists' with code 125 means the container was + # created successfully, so ignore it. + # initial creations leads to an exited container, restarting it + # here will keep it alive for us to exec through ret = self.run_command(self.host.restart_sos_container(), need_root=True) if ret['status'] == 0: @@ -152,6 +170,20 @@ class SosNode(): % res['stdout']) raise Exception + def get_container_auth(self): + """Determine what the auth string should be to pull the image used to + deploy our temporary container + """ + if self.opts.registry_user: + return self.host.runtimes['default'].fmt_registry_credentials( + self.opts.registry_user, + self.opts.registry_password + ) + else: + return self.host.runtimes['default'].fmt_registry_authfile( + self.opts.registry_authfile or self.host.container_authfile + ) + def file_exists(self, fname): """Checks for the presence of fname on the remote node""" if not self.local: @@ -343,7 +375,7 @@ class SosNode(): % self.commons['policy'].distro) return self.commons['policy'] host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(), - probe_runtime=False, remote_exec=self.ssh_cmd, + probe_runtime=True, remote_exec=self.ssh_cmd, remote_check=self.read_file('/etc/os-release')) if host: self.log_info("loaded policy %s for host" % host.distro) diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py index 9fe31513..f5b9fd5b 100644 --- a/sos/policies/distros/__init__.py +++ b/sos/policies/distros/__init__.py @@ -62,6 +62,7 @@ class LinuxPolicy(Policy): sos_bin_path = '/usr/bin' sos_container_name = 'sos-collector-tmp' container_version_command = None + container_authfile = None def __init__(self, sysroot=None, init=None, probe_runtime=True): super(LinuxPolicy, self).__init__(sysroot=sysroot, @@ -626,13 +627,26 @@ class LinuxPolicy(Policy): """ return '' - def create_sos_container(self): + def create_sos_container(self, image=None, auth=None, force_pull=False): """Returns the command that will create the container that will be used for running commands inside a container on hosts that require it. This will use the container runtime defined for the host type to launch a container. From there, we use the defined runtime to exec into the container's namespace. + + :param image: The name of the image if not using the policy default + :type image: ``str`` or ``None`` + + :param auth: The auth string required by the runtime to pull an + image from the registry + :type auth: ``str`` or ``None`` + + :param force_pull: Should the runtime forcibly pull the image + :type force_pull: ``bool`` + + :returns: The command to execute to launch the temp container + :rtype: ``str`` """ return '' diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py index 241d3f13..20afbcc4 100644 --- a/sos/policies/distros/redhat.py +++ b/sos/policies/distros/redhat.py @@ -452,15 +452,19 @@ support representative. return self.find_preset(ATOMIC) - def create_sos_container(self): + def create_sos_container(self, image=None, auth=None, force_pull=False): _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host" " --net=host --pid=host -e HOST=/host -e NAME={name} -e " - "IMAGE={image} -v /run:/run -v /var/log:/var/log -v " + "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v " "/etc/machine-id:/etc/machine-id -v " - "/etc/localtime:/etc/localtime -v /:/host {image}") + "/etc/localtime:/etc/localtime -v /:/host {auth} {image}") + _image = image or self.container_image + _pull = '--pull=always' if force_pull else '' return _cmd.format(runtime=self.container_runtime, name=self.sos_container_name, - image=self.container_image) + image=_image, + pull=_pull, + auth=auth or '') def set_cleanup_cmd(self): return 'docker rm --force sos-collector-tmp' @@ -482,6 +486,7 @@ support representative. container_image = 'registry.redhat.io/rhel8/support-tools' sos_path_strip = '/host' container_version_command = 'rpm -q sos' + container_authfile = '/var/lib/kubelet/config.json' def __init__(self, sysroot=None, init=None, probe_runtime=True, remote_exec=None): @@ -511,15 +516,19 @@ support representative. # RH OCP environments. return self.find_preset(RHOCP) - def create_sos_container(self): + def create_sos_container(self, image=None, auth=None, force_pull=False): _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host" " --net=host --pid=host -e HOST=/host -e NAME={name} -e " - "IMAGE={image} -v /run:/run -v /var/log:/var/log -v " + "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v " "/etc/machine-id:/etc/machine-id -v " - "/etc/localtime:/etc/localtime -v /:/host {image}") + "/etc/localtime:/etc/localtime -v /:/host {auth} {image}") + _image = image or self.container_image + _pull = '--pull=always' if force_pull else '' return _cmd.format(runtime=self.container_runtime, name=self.sos_container_name, - image=self.container_image) + image=_image, + pull=_pull, + auth=auth or '') def set_cleanup_cmd(self): return 'podman rm --force %s' % self.sos_container_name diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py index 1a61b644..f28d6a1d 100644 --- a/sos/policies/runtimes/__init__.py +++ b/sos/policies/runtimes/__init__.py @@ -157,6 +157,31 @@ class ContainerRuntime(): quoted_cmd = cmd return "%s %s %s" % (self.run_cmd, container, quoted_cmd) + def fmt_registry_credentials(self, username, password): + """Format a string to pass to the 'run' command of the runtime to + enable authorization for pulling the image during `sos collect`, if + needed using username and optional password creds + + :param username: The name of the registry user + :type username: ``str`` + + :param password: The password of the registry user + :type password: ``str`` or ``None`` + + :returns: The string to use to enable a run command to pull the image + :rtype: ``str`` + """ + return "--creds=%s%s" % (username, ':' + password if password else '') + + def fmt_registry_authfile(self, authfile): + """Format a string to pass to the 'run' command of the runtime to + enable authorization for pulling the image during `sos collect`, if + needed using an authfile. + """ + if authfile: + return "--authfile %s" % authfile + return '' + def get_logs_command(self, container): """Get the command string used to dump container logs from the runtime -- 2.26.3 From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 12 May 2021 13:26:45 -0400 Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes Adds a method the `Cluster` that allows a profile to set sos options specifically for master nodes. Signed-off-by: Jake Hunsaker --- sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++ sos/collector/sosnode.py | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index 5c002bae..bfa3aad3 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -137,6 +137,27 @@ class Cluster(): """ self.cluster_ssh_key = key + def set_master_options(self, node): + """If there is a need to set specific options in the sos command being + run on the cluster's master nodes, override this method in the cluster + profile and do that here. + + :param node: The master node + :type node: ``SoSNode`` + """ + pass + + def check_node_is_master(self, node): + """In the event there are multiple masters, or if the collect command + is being run from a system that is technically capable of enumerating + nodes but the cluster profiles needs to specify master-specific options + for other nodes, override this method in the cluster profile + + :param node: The node for the cluster to check + :type node: ``SoSNode`` + """ + return node.address == self.master.address + def exec_master_cmd(self, cmd, need_root=False): """Used to retrieve command output from a (master) node in a cluster diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index d1c11824..62666635 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -647,6 +647,10 @@ class SosNode(): self.cluster.sos_plugin_options[opt]) self.opts.plugin_options.append(option) + # set master-only options + if self.cluster.check_node_is_master(self): + self.cluster.set_master_options(self) + def finalize_sos_cmd(self): """Use host facts and compare to the cluster type to modify the sos command if needed""" @@ -707,6 +711,8 @@ class SosNode(): os.path.join(self.host.sos_bin_path, self.sos_bin) ) + self.update_cmd_from_cluster() + if self.opts.only_plugins: plugs = [o for o in self.opts.only_plugins if self._plugin_exists(o)] -- 2.26.3 From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 12 May 2021 16:06:29 -0400 Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node lists If the master node is determined to not have sos installed, that is not necessarily a fatal error for scenarios where the 'master' node is only being used to enumerate node lists and is not actually part of the cluster. This can happen when a user is using a bastion node to enumerate and connect to the cluster environment, or if the local host is being used to enumerate nodes via cluster client tooling. Signed-off-by: Jake Hunsaker --- sos/collector/sosnode.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 62666635..7e56483d 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -287,13 +287,20 @@ class SosNode(): # use the containerized policy's command pkgs = self.run_command(self.host.container_version_command, use_container=True, need_root=True) - ver = pkgs['stdout'].strip().split('-')[1] - if ver: - self.sos_info['version'] = ver - if 'version' in self.sos_info: + if pkgs['status'] == 0: + ver = pkgs['stdout'].strip().split('-')[1] + if ver: + self.sos_info['version'] = ver + else: + self.sos_info['version'] = None + if self.sos_info['version']: self.log_info('sos version is %s' % self.sos_info['version']) else: - self.log_error('sos is not installed on this node') + if not self.address == self.opts.master: + # in the case where the 'master' enumerates nodes but is not + # intended for collection (bastions), don't worry about sos not + # being present + self.log_error('sos is not installed on this node') self.connected = False return False cmd = 'sosreport -l' -- 2.26.3 From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 12 May 2021 18:26:09 -0400 Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for containers Fixes an issue where the sos inspection commands were not properly marked as needing to be run as root (either directly or via sudo) for containerized hosts, which would lead to incorrect sos command formatting. Mark those commands, and the final container removal command, as explicitly needing root permissions. Signed-off-by: Jake Hunsaker --- sos/collector/sosnode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 7e56483d..1fc03076 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -304,7 +304,7 @@ class SosNode(): self.connected = False return False cmd = 'sosreport -l' - sosinfo = self.run_command(cmd, use_container=True) + sosinfo = self.run_command(cmd, use_container=True, need_root=True) if sosinfo['status'] == 0: self._load_sos_plugins(sosinfo['stdout']) if self.check_sos_version('3.6'): @@ -312,7 +312,7 @@ class SosNode(): def _load_sos_presets(self): cmd = 'sosreport --list-presets' - res = self.run_command(cmd, use_container=True) + res = self.run_command(cmd, use_container=True, need_root=True) if res['status'] == 0: for line in res['stdout'].splitlines(): if line.strip().startswith('name:'): @@ -996,7 +996,7 @@ class SosNode(): self.remove_file(self.sos_path + '.md5') cleanup = self.host.set_cleanup_cmd() if cleanup: - self.run_command(cleanup) + self.run_command(cleanup, need_root=True) def collect_extra_cmd(self, filenames): """Collect the file created by a cluster outside of sos""" -- 2.26.3 From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 12 May 2021 18:55:31 -0400 Subject: [PATCH] [ocp] Add cluster profile for OCP4 Removes the previous OCP cluster profile and replaces it with an updated one for OCP4 which is entirely separated from the kubernetes profile. Resolves: #2544 Signed-off-by: Jake Hunsaker --- sos/collector/clusters/kubernetes.py | 8 -- sos/collector/clusters/ocp.py | 109 +++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 8 deletions(-) create mode 100644 sos/collector/clusters/ocp.py diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py index 6a867e31..08fd9554 100644 --- a/sos/collector/clusters/kubernetes.py +++ b/sos/collector/clusters/kubernetes.py @@ -44,11 +44,3 @@ class kubernetes(Cluster): return nodes else: raise Exception('Node enumeration did not return usable output') - - -class openshift(kubernetes): - - cluster_name = 'OpenShift Container Platform' - packages = ('atomic-openshift',) - sos_preset = 'ocp' - cmd = 'oc' diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py new file mode 100644 index 00000000..283fcfd1 --- /dev/null +++ b/sos/collector/clusters/ocp.py @@ -0,0 +1,109 @@ +# Copyright Red Hat 2021, Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from pipes import quote +from sos.collector.clusters import Cluster + + +class ocp(Cluster): + """OpenShift Container Platform v4""" + + cluster_name = 'OpenShift Container Platform v4' + packages = ('openshift-hyperkube', 'openshift-clients') + + option_list = [ + ('label', '', 'Colon delimited list of labels to select nodes with'), + ('role', '', 'Colon delimited list of roles to select nodes with'), + ('kubeconfig', '', 'Path to the kubeconfig file') + ] + + def fmt_oc_cmd(self, cmd): + """Format the oc command to optionall include the kubeconfig file if + one is specified + """ + if self.get_option('kubeconfig'): + return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd) + return "oc %s" % cmd + + def check_enabled(self): + if super(ocp, self).check_enabled(): + return True + _who = self.fmt_oc_cmd('whoami') + return self.exec_master_cmd(_who)['status'] == 0 + + def _build_dict(self, nodelist): + """From the output of get_nodes(), construct an easier-to-reference + dict of nodes that will be used in determining labels, master status, + etc... + + :param nodelist: The split output of `oc get nodes` + :type nodelist: ``list`` + + :returns: A dict of nodes with `get nodes` columns as keys + :rtype: ``dict`` + """ + nodes = {} + if 'NAME' in nodelist[0]: + # get the index of the fields + statline = nodelist.pop(0).split() + idx = {} + for state in ['status', 'roles', 'version', 'os-image']: + try: + idx[state] = statline.index(state.upper()) + except Exception: + pass + for node in nodelist: + _node = node.split() + nodes[_node[0]] = {} + for column in idx: + nodes[_node[0]][column] = _node[idx[column]] + return nodes + + def get_nodes(self): + nodes = [] + self.node_dict = {} + cmd = 'get nodes -o wide' + if self.get_option('label'): + labels = ','.join(self.get_option('label').split(':')) + cmd += " -l %s" % quote(labels) + res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) + if res['status'] == 0: + roles = [r for r in self.get_option('role').split(':')] + self.node_dict = self._build_dict(res['stdout'].splitlines()) + for node in self.node_dict: + if roles: + for role in roles: + if role in node: + nodes.append(node) + else: + nodes.append(node) + else: + msg = "'oc' command failed" + if 'Missing or incomplete' in res['stdout']: + msg = ("'oc' failed due to missing kubeconfig on master node." + " Specify one via '-c ocp.kubeconfig='") + raise Exception(msg) + return nodes + + def set_node_label(self, node): + if node.address not in self.node_dict: + return '' + for label in ['master', 'worker']: + if label in self.node_dict[node.address]['roles']: + return label + return '' + + def check_node_is_master(self, sosnode): + if sosnode.address not in self.node_dict: + return False + return 'master' in self.node_dict[sosnode.address]['roles'] + + def set_master_options(self, node): + node.opts.enable_plugins.append('openshift') -- 2.26.3 From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 26 May 2021 11:55:24 -0400 Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files Fixes an issue where empty lines would be stripped from files that have other obfuscations in them. Those empty lines may be important for file structure and/or readability, so we should instead simply not pass empty lines to the parsers rather than skipping them wholesale in the flow of writing obfuscations to a temp file before replacing the source file with a potentially changed temp file. Resolves: #2562 Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index bdd24f95..55465b85 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -603,8 +603,6 @@ third party. tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) with open(filename, 'r') as fname: for line in fname: - if not line.strip(): - continue try: line, count = self.obfuscate_line(line) subs += count @@ -642,7 +640,11 @@ third party. Returns the fully obfuscated line and the number of substitutions made """ + # don't iterate over blank lines, but still write them to the tempfile + # to maintain the same structure when we write a scrubbed file back count = 0 + if not line.strip(): + return line, count for parser in self.parsers: try: line, _count = parser.parse_line(line) -- 2.26.3 From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 5 May 2021 17:02:04 -0400 Subject: [PATCH] [cleaner] Remove binary files by default Binary files generally speaking cannot be obfuscated, and as such we should remove them from archives being obfuscated by default so that sensitive data is not mistakenly included in an obfuscated archive. This commits adds a new `--keep-binary-files` option that if used will keep any encountered binary files in the final archive. The default option of `false` will ensure that encountered binary files are removed. The number of removed binary files per archive is reported when obfuscation is completed for that archive. Closes: #2478 Resolves: #2524 Signed-off-by: Jake Hunsaker --- man/en/sos-clean.1 | 12 ++++ sos/cleaner/__init__.py | 21 +++++- sos/cleaner/obfuscation_archive.py | 67 ++++++++++++++++++-- sos/collector/__init__.py | 5 ++ sos/report/__init__.py | 6 ++ 8 files changed, 167 insertions(+), 7 deletions(-) diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1 index 4856b43b..b77bc63c 100644 --- a/man/en/sos-clean.1 +++ b/man/en/sos-clean.1 @@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports [\-\-map-file] [\-\-jobs] [\-\-no-update] + [\-\-keep-binary-files] .SH DESCRIPTION \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from @@ -77,6 +78,17 @@ Default: 4 .TP .B \-\-no-update Do not write the mapping file contents to /etc/sos/cleaner/default_mapping +.TP +.B \-\-keep-binary-files +Keep unprocessable binary files in the archive, rather than removing them. + +Note that binary files cannot be obfuscated, and thus keeping them in the archive +may result in otherwise sensitive information being included in the final archive. +Users should review any archive that keeps binary files in place before sending to +a third party. + +Default: False (remove encountered binary files) + .SH SEE ALSO .BR sos (1) .BR sos-report (1) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 55465b85..f88ff8a0 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent): 'keyword_file': None, 'map_file': '/etc/sos/cleaner/default_mapping', 'no_update': False, + 'keep_binary_files': False, 'target': '', 'usernames': [] } @@ -183,6 +184,11 @@ third party. action='store_true', help='Do not update the --map file with new ' 'mappings from this run') + clean_grp.add_argument('--keep-binary-files', default=False, + action='store_true', + dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') clean_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') @@ -467,6 +473,11 @@ third party. "%s concurrently\n" % (len(self.report_paths), self.opts.jobs)) self.ui_log.info(msg) + if self.opts.keep_binary_files: + self.ui_log.warning( + "WARNING: binary files that potentially contain sensitive " + "information will NOT be removed from the final archive\n" + ) pool = ThreadPoolExecutor(self.opts.jobs) pool.map(self.obfuscate_report, self.report_paths, chunksize=1) pool.shutdown(wait=True) @@ -539,6 +550,10 @@ third party. short_name = fname.split(archive.archive_name + '/')[1] if archive.should_skip_file(short_name): continue + if (not self.opts.keep_binary_files and + archive.should_remove_file(short_name)): + archive.remove_file(short_name) + continue try: count = self.obfuscate_file(fname, short_name, archive.archive_name) @@ -574,7 +589,11 @@ third party. arc_md.add_field('files_obfuscated', len(archive.file_sub_list)) arc_md.add_field('total_substitutions', archive.total_sub_count) self.completed_reports.append(archive) - archive.report_msg("Obfuscation completed") + rmsg = '' + if archive.removed_file_count: + rmsg = " [removed %s unprocessable files]" + rmsg = rmsg % archive.removed_file_count + archive.report_msg("Obfuscation completed%s" % rmsg) except Exception as err: self.ui_log.info("Exception while processing %s: %s" diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index c64ab13b..76841b51 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -28,6 +28,7 @@ class SoSObfuscationArchive(): file_sub_list = [] total_sub_count = 0 + removed_file_count = 0 def __init__(self, archive_path, tmpdir): self.archive_path = archive_path @@ -62,11 +63,7 @@ class SoSObfuscationArchive(): 'sys/firmware', 'sys/fs', 'sys/kernel/debug', - 'sys/module', - r'.*\.tar$', # TODO: support archive unpacking - # Be explicit with these tar matches to avoid matching commands - r'.*\.tar\.xz', - '.*.gz' + 'sys/module' ] @property @@ -76,6 +73,17 @@ class SoSObfuscationArchive(): except Exception: return False + def remove_file(self, fname): + """Remove a file from the archive. This is used when cleaner encounters + a binary file, which we cannot reliably obfuscate. + """ + full_fname = self.get_file_path(fname) + # don't call a blank remove() here + if full_fname: + self.log_info("Removing binary file '%s' from archive" % fname) + os.remove(full_fname) + self.removed_file_count += 1 + def extract(self): if self.is_tarfile: self.report_msg("Extracting...") @@ -227,3 +235,52 @@ class SoSObfuscationArchive(): if filename.startswith(_skip) or re.match(_skip, filename): return True return False + + def should_remove_file(self, fname): + """Determine if the file should be removed or not, due to an inability + to reliably obfuscate that file based on the filename. + + :param fname: Filename relative to the extracted archive root + :type fname: ``str`` + + :returns: ``True`` if the file cannot be reliably obfuscated + :rtype: ``bool`` + """ + obvious_removes = [ + r'.*\.gz', # TODO: support flat gz/xz extraction + r'.*\.xz', + r'.*\.bzip2', + r'.*\.tar\..*', # TODO: support archive unpacking + r'.*\.txz$', + r'.*\.tgz$', + r'.*\.bin', + r'.*\.journal', + r'.*\~$' + ] + + # if the filename matches, it is obvious we can remove them without + # doing the read test + for _arc_reg in obvious_removes: + if re.match(_arc_reg, fname): + return True + + return self.file_is_binary(fname) + + def file_is_binary(self, fname): + """Determine if the file is a binary file or not. + + + :param fname: Filename relative to the extracted archive root + :type fname: ``str`` + + :returns: ``True`` if file is binary, else ``False`` + :rtype: ``bool`` + """ + with open(self.get_file_path(fname), 'tr') as tfile: + try: + # when opened as above (tr), reading binary content will raise + # an exception + tfile.read(1) + return False + except UnicodeDecodeError: + return True diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 9884836c..469db60d 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -67,6 +67,7 @@ class SoSCollector(SoSComponent): 'jobs': 4, 'keywords': [], 'keyword_file': None, + 'keep_binary_files': False, 'label': '', 'list_options': False, 'log_size': 0, @@ -410,6 +411,10 @@ class SoSCollector(SoSComponent): dest='clean', default=False, action='store_true', help='Obfuscate sensistive information') + cleaner_grp.add_argument('--keep-binary-files', default=False, + action='store_true', dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') cleaner_grp.add_argument('--domains', dest='domains', default=[], action='extend', help='Additional domain names to obfuscate') diff --git a/sos/report/__init__.py b/sos/report/__init__.py index d4345409..2cedc76e 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -82,6 +82,7 @@ class SoSReport(SoSComponent): 'case_id': '', 'chroot': 'auto', 'clean': False, + 'keep_binary_files': False, 'desc': '', 'domains': [], 'dry_run': False, @@ -344,6 +345,11 @@ class SoSReport(SoSComponent): default='/etc/sos/cleaner/default_mapping', help=('Provide a previously generated mapping' ' file for obfuscation')) + cleaner_grp.add_argument('--keep-binary-files', default=False, + action='store_true', + dest='keep_binary_files', + help='Keep unprocessable binary files in the ' + 'archive instead of removing them') cleaner_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 11 Jun 2021 23:20:59 -0400 Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options Like we now do for primary nodes, add the ability to individually set node options via a new `set_node_options()` method for when blanket setting options across all nodes via the options class attrs is not sufficient. Signed-off-by: Jake Hunsaker --- sos/collector/clusters/__init__.py | 10 ++++++++++ sos/collector/sosnode.py | 6 ++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index 90e62d79..c4da1ab8 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -137,6 +137,16 @@ class Cluster(): """ self.cluster_ssh_key = key + def set_node_options(self, node): + """If there is a need to set specific options on ONLY the non-primary + nodes in a collection, override this method in the cluster profile + and do that here. + + :param node: The non-primary node + :type node: ``SoSNode`` + """ + pass + def set_master_options(self, node): """If there is a need to set specific options in the sos command being run on the cluster's master nodes, override this method in the cluster diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 1fc03076..7e784aa1 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -657,6 +657,8 @@ class SosNode(): # set master-only options if self.cluster.check_node_is_master(self): self.cluster.set_master_options(self) + else: + self.cluster.set_node_options(self) def finalize_sos_cmd(self): """Use host facts and compare to the cluster type to modify the sos @@ -713,13 +715,13 @@ class SosNode(): sos_opts.append('--cmd-timeout=%s' % quote(str(self.opts.cmd_timeout))) + self.update_cmd_from_cluster() + sos_cmd = sos_cmd.replace( 'sosreport', os.path.join(self.host.sos_bin_path, self.sos_bin) ) - self.update_cmd_from_cluster() - if self.opts.only_plugins: plugs = [o for o in self.opts.only_plugins if self._plugin_exists(o)] -- 2.26.3 From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Sat, 12 Jun 2021 00:02:36 -0400 Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()` Updates `run_command()` to support passing new environment variables to the command being run, for that command alone. This parameter takes a dict, and if set we will first copy the existing set of env vars on the node and then update that set of variables using the passed dict. Additionally, `execute_sos_command()` will now try to pass a new `sos_env_vars` dict (default empty) so that clusters may set environment variables specifically for the sos command being run, without having to modify the actual sos command being executed. Signed-off-by: Jake Hunsaker --- sos/collector/sosnode.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 7e784aa1..40472a4e 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -45,6 +45,8 @@ class SosNode(): self.host = None self.cluster = None self.hostname = None + self.sos_env_vars = {} + self._env_vars = {} self._password = password or self.opts.password if not self.opts.nopasswd_sudo and not self.opts.sudo_pw: self.opts.sudo_pw = self._password @@ -109,6 +111,21 @@ class SosNode(): def _fmt_msg(self, msg): return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg) + @property + def env_vars(self): + if not self._env_vars: + if self.local: + self._env_vars = os.environ.copy() + else: + ret = self.run_command("env --null") + if ret['status'] == 0: + for ln in ret['output'].split('\x00'): + if not ln: + continue + _val = ln.split('=') + self._env_vars[_val[0]] = _val[1] + return self._env_vars + def set_node_manifest(self, manifest): """Set the manifest section that this node will write to """ @@ -404,7 +421,7 @@ class SosNode(): return self.host.package_manager.pkg_by_name(pkg) is not None def run_command(self, cmd, timeout=180, get_pty=False, need_root=False, - force_local=False, use_container=False): + force_local=False, use_container=False, env=None): """Runs a given cmd, either via the SSH session or locally Arguments: @@ -446,7 +463,10 @@ class SosNode(): else: if get_pty: cmd = "/bin/bash -c %s" % quote(cmd) - res = pexpect.spawn(cmd, encoding='utf-8') + if env: + _cmd_env = self.env_vars + _cmd_env.update(env) + res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env) if need_root: if self.need_sudo: res.sendline(self.opts.sudo_pw) @@ -830,7 +850,8 @@ class SosNode(): res = self.run_command(self.sos_cmd, timeout=self.opts.timeout, get_pty=True, need_root=True, - use_container=True) + use_container=True, + env=self.sos_env_vars) if res['status'] == 0: for line in res['stdout'].splitlines(): if fnmatch.fnmatch(line, '*sosreport-*tar*'): -- 2.26.3 From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 15 Jun 2021 12:43:27 -0400 Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options between nodes This commit overhauls the function of `finalize_sos_cmd()` in several ways. First, assign the sos report plugin related options directly to private copies of those values for each node, so that the shared cluster profile does not clober options between nodes. Second, provide a default Lock mechanism for clusters that need to perform some node-comparison logic when assigning options based on node role. Finally, finalize the sos command for each node _prior_ to the call to `SoSNode.sosreport()` so that we can be sure that clusters are able to appropriately compare and assign sos options across nodes before some nodes have already started and/or finished their own sos report collections. Signed-off-by: Jake Hunsaker --- sos/collector/__init__.py | 14 +++++ sos/collector/clusters/__init__.py | 2 + sos/collector/sosnode.py | 89 +++++++++++++++++------------- 3 files changed, 67 insertions(+), 38 deletions(-) diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 469db60d..7b8cfcf7 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to. "concurrently\n" % (self.report_num, self.opts.jobs)) + npool = ThreadPoolExecutor(self.opts.jobs) + npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1) + npool.shutdown(wait=True) + pool = ThreadPoolExecutor(self.opts.jobs) pool.map(self._collect, self.client_list, chunksize=1) pool.shutdown(wait=True) @@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to. except Exception as err: self.ui_log.error("Upload attempt failed: %s" % err) + def _finalize_sos_cmd(self, client): + """Calls finalize_sos_cmd() on each node so that we have the final + command before we thread out the actual execution of sos + """ + try: + client.finalize_sos_cmd() + except Exception as err: + self.log_error("Could not finalize sos command for %s: %s" + % (client.address, err)) + def _collect(self, client): """Runs sosreport on each node""" try: diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py index c4da1ab8..bb728bc0 100644 --- a/sos/collector/clusters/__init__.py +++ b/sos/collector/clusters/__init__.py @@ -11,6 +11,7 @@ import logging from sos.options import ClusterOption +from threading import Lock class Cluster(): @@ -66,6 +67,7 @@ class Cluster(): if cls.__name__ != 'Cluster': self.cluster_type.append(cls.__name__) self.node_list = None + self.lock = Lock() self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') self.options = [] diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 40472a4e..1c25cc34 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -38,6 +38,7 @@ class SosNode(): self.address = address.strip() self.commons = commons self.opts = commons['cmdlineopts'] + self._assign_config_opts() self.tmpdir = commons['tmpdir'] self.hostlen = commons['hostlen'] self.need_sudo = commons['need_sudo'] @@ -465,8 +466,8 @@ class SosNode(): cmd = "/bin/bash -c %s" % quote(cmd) if env: _cmd_env = self.env_vars - _cmd_env.update(env) - res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env) + env = _cmd_env.update(env) + res = pexpect.spawn(cmd, encoding='utf-8', env=env) if need_root: if self.need_sudo: res.sendline(self.opts.sudo_pw) @@ -484,9 +485,6 @@ class SosNode(): def sosreport(self): """Run a sosreport on the node, then collect it""" - self.sos_cmd = self.finalize_sos_cmd() - self.log_info('Final sos command set to %s' % self.sos_cmd) - self.manifest.add_field('final_sos_command', self.sos_cmd) try: path = self.execute_sos_command() if path: @@ -656,29 +654,42 @@ class SosNode(): This will NOT override user supplied options. """ if self.cluster.sos_preset: - if not self.opts.preset: - self.opts.preset = self.cluster.sos_preset + if not self.preset: + self.preset = self.cluster.sos_preset else: self.log_info('Cluster specified preset %s but user has also ' 'defined a preset. Using user specification.' % self.cluster.sos_preset) if self.cluster.sos_plugins: for plug in self.cluster.sos_plugins: - if plug not in self.opts.enable_plugins: - self.opts.enable_plugins.append(plug) + if plug not in self.enable_plugins: + self.enable_plugins.append(plug) if self.cluster.sos_plugin_options: for opt in self.cluster.sos_plugin_options: - if not any(opt in o for o in self.opts.plugin_options): + if not any(opt in o for o in self.plugin_options): option = '%s=%s' % (opt, self.cluster.sos_plugin_options[opt]) - self.opts.plugin_options.append(option) + self.plugin_options.append(option) # set master-only options if self.cluster.check_node_is_master(self): - self.cluster.set_master_options(self) + with self.cluster.lock: + self.cluster.set_master_options(self) else: - self.cluster.set_node_options(self) + with self.cluster.lock: + self.cluster.set_node_options(self) + + def _assign_config_opts(self): + """From the global opts configuration, assign those values locally + to this node so that they may be acted on individually. + """ + # assign these to new, private copies + self.only_plugins = list(self.opts.only_plugins) + self.skip_plugins = list(self.opts.skip_plugins) + self.enable_plugins = list(self.opts.enable_plugins) + self.plugin_options = list(self.opts.plugin_options) + self.preset = list(self.opts.preset) def finalize_sos_cmd(self): """Use host facts and compare to the cluster type to modify the sos @@ -742,59 +753,61 @@ class SosNode(): os.path.join(self.host.sos_bin_path, self.sos_bin) ) - if self.opts.only_plugins: - plugs = [o for o in self.opts.only_plugins - if self._plugin_exists(o)] - if len(plugs) != len(self.opts.only_plugins): - not_only = list(set(self.opts.only_plugins) - set(plugs)) + if self.only_plugins: + plugs = [o for o in self.only_plugins if self._plugin_exists(o)] + if len(plugs) != len(self.only_plugins): + not_only = list(set(self.only_plugins) - set(plugs)) self.log_debug('Requested plugins %s were requested to be ' 'enabled but do not exist' % not_only) - only = self._fmt_sos_opt_list(self.opts.only_plugins) + only = self._fmt_sos_opt_list(self.only_plugins) if only: sos_opts.append('--only-plugins=%s' % quote(only)) - return "%s %s" % (sos_cmd, ' '.join(sos_opts)) + self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) + self.log_info('Final sos command set to %s' % self.sos_cmd) + self.manifest.add_field('final_sos_command', self.sos_cmd) + return - if self.opts.skip_plugins: + if self.skip_plugins: # only run skip-plugins for plugins that are enabled - skip = [o for o in self.opts.skip_plugins - if self._check_enabled(o)] - if len(skip) != len(self.opts.skip_plugins): - not_skip = list(set(self.opts.skip_plugins) - set(skip)) + skip = [o for o in self.skip_plugins if self._check_enabled(o)] + if len(skip) != len(self.skip_plugins): + not_skip = list(set(self.skip_plugins) - set(skip)) self.log_debug('Requested to skip plugins %s, but plugins are ' 'already not enabled' % not_skip) skipln = self._fmt_sos_opt_list(skip) if skipln: sos_opts.append('--skip-plugins=%s' % quote(skipln)) - if self.opts.enable_plugins: + if self.enable_plugins: # only run enable for plugins that are disabled - opts = [o for o in self.opts.enable_plugins - if o not in self.opts.skip_plugins + opts = [o for o in self.enable_plugins + if o not in self.skip_plugins and self._check_disabled(o) and self._plugin_exists(o)] - if len(opts) != len(self.opts.enable_plugins): - not_on = list(set(self.opts.enable_plugins) - set(opts)) + if len(opts) != len(self.enable_plugins): + not_on = list(set(self.enable_plugins) - set(opts)) self.log_debug('Requested to enable plugins %s, but plugins ' 'are already enabled or do not exist' % not_on) enable = self._fmt_sos_opt_list(opts) if enable: sos_opts.append('--enable-plugins=%s' % quote(enable)) - if self.opts.plugin_options: - opts = [o for o in self.opts.plugin_options + if self.plugin_options: + opts = [o for o in self.plugin_options if self._plugin_exists(o.split('.')[0]) and self._plugin_option_exists(o.split('=')[0])] if opts: sos_opts.append('-k %s' % quote(','.join(o for o in opts))) - if self.opts.preset: - if self._preset_exists(self.opts.preset): - sos_opts.append('--preset=%s' % quote(self.opts.preset)) + if self.preset: + if self._preset_exists(self.preset): + sos_opts.append('--preset=%s' % quote(self.preset)) else: self.log_debug('Requested to enable preset %s but preset does ' - 'not exist on node' % self.opts.preset) + 'not exist on node' % self.preset) - _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) - return _sos_cmd + self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts)) + self.log_info('Final sos command set to %s' % self.sos_cmd) + self.manifest.add_field('final_sos_command', self.sos_cmd) def determine_sos_label(self): """Determine what, if any, label should be added to the sosreport""" -- 2.26.3 From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 15 Jun 2021 18:38:34 -0400 Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation Improves handling of symlink obfuscation by only performing the obfuscaiton on the ultimate target of any symlinks encountered. Now, when a symlink is encountered, clean will obfuscate the link name and re-write it in the archive, pointing to the (potentially obfuscated) target name. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index abfb684b..b38c8dfc 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -612,28 +612,55 @@ third party. if not filename: # the requested file doesn't exist in the archive return - self.log_debug("Obfuscating %s" % short_name or filename, - caller=arc_name) subs = 0 - tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) - with open(filename, 'r') as fname: - for line in fname: - try: - line, count = self.obfuscate_line(line) - subs += count - tfile.write(line) - except Exception as err: - self.log_debug("Unable to obfuscate %s: %s" - % (short_name, err), caller=arc_name) - tfile.seek(0) - if subs: - shutil.copy(tfile.name, filename) - tfile.close() - _ob_filename = self.obfuscate_string(short_name) - if _ob_filename != short_name: + if not os.path.islink(filename): + # don't run the obfuscation on the link, but on the actual file + # at some other point. + self.log_debug("Obfuscating %s" % short_name or filename, + caller=arc_name) + tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) + with open(filename, 'r') as fname: + for line in fname: + try: + line, count = self.obfuscate_line(line) + subs += count + tfile.write(line) + except Exception as err: + self.log_debug("Unable to obfuscate %s: %s" + % (short_name, err), caller=arc_name) + tfile.seek(0) + if subs: + shutil.copy(tfile.name, filename) + tfile.close() + + _ob_short_name = self.obfuscate_string(short_name.split('/')[-1]) + _ob_filename = short_name.replace(short_name.split('/')[-1], + _ob_short_name) + _sym_changed = False + if os.path.islink(filename): + _link = os.readlink(filename) + _ob_link = self.obfuscate_string(_link) + if _ob_link != _link: + _sym_changed = True + + if (_ob_filename != short_name) or _sym_changed: arc_path = filename.split(short_name)[0] _ob_path = os.path.join(arc_path, _ob_filename) - os.rename(filename, _ob_path) + # ensure that any plugin subdirs that contain obfuscated strings + # get created with obfuscated counterparts + if not os.path.islink(filename): + os.rename(filename, _ob_path) + else: + # generate the obfuscated name of the link target + _target_ob = self.obfuscate_string(os.readlink(filename)) + # remove the unobfuscated original symlink first, in case the + # symlink name hasn't changed but the target has + os.remove(filename) + # create the newly obfuscated symlink, pointing to the + # obfuscated target name, which may not exist just yet, but + # when the actual file is obfuscated, will be created + os.symlink(_target_ob, _ob_path) + return subs def obfuscate_string(self, string_data): -- 2.26.3 From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 15 Jun 2021 22:56:19 -0400 Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match first When matching strings in parsers to do obfuscation, we should be using the most precise matches found first, rather than matching in the order a match is hit. This ensures that we correctly obfuscate an entire string, rather than potentially only partial substring(s) that exist within the entire match. Signed-off-by: Jake Hunsaker --- sos/cleaner/parsers/__init__.py | 10 +++++++--- sos/cleaner/parsers/keyword_parser.py | 2 +- sos/cleaner/parsers/username_parser.py | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index c77300aa..cfa20b95 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -82,10 +82,12 @@ class SoSCleanerParser(): for pattern in self.regex_patterns: matches = [m[0] for m in re.findall(pattern, line, re.I)] if matches: + matches.sort(reverse=True, key=lambda x: len(x)) count += len(matches) for match in matches: - new_match = self.mapping.get(match.strip()) - line = line.replace(match.strip(), new_match) + match = match.strip() + new_match = self.mapping.get(match) + line = line.replace(match, new_match) return line, count def parse_string_for_keys(self, string_data): @@ -102,7 +104,9 @@ class SoSCleanerParser(): :returns: The obfuscated line :rtype: ``str`` """ - for key, val in self.mapping.dataset.items(): + for pair in sorted(self.mapping.dataset.items(), reverse=True, + key=lambda x: len(x[0])): + key, val = pair if key in string_data: string_data = string_data.replace(key, val) return string_data diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index 3dc2b7f0..9134f82d 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser): def parse_line(self, line): count = 0 - for keyword in self.user_keywords: + for keyword in sorted(self.user_keywords, reverse=True): if keyword in line: line = line.replace(keyword, self.mapping.get(keyword)) count += 1 diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 2bb6c7f3..0c3bbac4 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser): def parse_line(self, line): count = 0 - for username in self.mapping.dataset.keys(): + for username in sorted(self.mapping.dataset.keys(), reverse=True): if username in line: count = line.count(username) line = line.replace(username, self.mapping.get(username)) -- 2.26.3 From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 16 Jun 2021 01:15:45 -0400 Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within archives This commits adds a step to `obfuscate_report()` that explicitly walks through all directories in the archive, and obfuscates the directory names if necessary. Since this uses `obfuscate_string()` for the directory names, a `skip_keys` list has been added to maps to allow parsers/maps to specify matched keys (such as short names for the hostname parser) that should not be considered when obfuscating directory names (e.g. 'www'). Closes: #2465 Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 26 ++++++++++++++++++++++++++ sos/cleaner/mappings/__init__.py | 4 +++- sos/cleaner/mappings/hostname_map.py | 5 +++++ sos/cleaner/obfuscation_archive.py | 20 ++++++++++++++++++-- sos/cleaner/parsers/__init__.py | 2 ++ 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b38c8dfc..88d4d0ea 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -562,6 +562,11 @@ third party. except Exception as err: self.log_debug("Unable to parse file %s: %s" % (short_name, err)) + try: + self.obfuscate_directory_names(archive) + except Exception as err: + self.log_info("Failed to obfuscate directories: %s" % err, + caller=archive.archive_name) # if the archive was already a tarball, repack it method = archive.get_compression() @@ -663,6 +668,27 @@ third party. return subs + def obfuscate_directory_names(self, archive): + """For all directories that exist within the archive, obfuscate the + directory name if it contains sensitive strings found during execution + """ + self.log_info("Obfuscating directory names in archive %s" + % archive.archive_name) + for dirpath in sorted(archive.get_directory_list(), reverse=True): + for _name in os.listdir(dirpath): + _dirname = os.path.join(dirpath, _name) + _arc_dir = _dirname.split(archive.extracted_path)[-1] + if os.path.isdir(_dirname): + _ob_dirname = self.obfuscate_string(_name) + if _ob_dirname != _name: + _ob_arc_dir = _arc_dir.rstrip(_name) + _ob_arc_dir = os.path.join( + archive.extracted_path, + _ob_arc_dir.lstrip('/'), + _ob_dirname + ) + os.rename(_dirname, _ob_arc_dir) + def obfuscate_string(self, string_data): for parser in self.parsers: try: diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py index dd464e5a..5cf5c8b2 100644 --- a/sos/cleaner/mappings/__init__.py +++ b/sos/cleaner/mappings/__init__.py @@ -20,8 +20,10 @@ class SoSMap(): corresponding SoSMap() object, to allow for easy retrieval of obfuscated items. """ - + # used for regex skips in parser.parse_line() ignore_matches = [] + # used for filename obfuscations in parser.parse_string_for_keys() + skip_keys = [] def __init__(self): self.dataset = {} diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py index e0b7bf1d..c9a44d8d 100644 --- a/sos/cleaner/mappings/hostname_map.py +++ b/sos/cleaner/mappings/hostname_map.py @@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap): '^com..*' ] + skip_keys = [ + 'www', + 'api' + ] + host_count = 0 domain_count = 0 _domains = {} diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index 88f978d9..90188358 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -202,10 +202,22 @@ class SoSObfuscationArchive(): """Return a list of all files within the archive""" self.file_list = [] for dirname, dirs, files in os.walk(self.extracted_path): + for _dir in dirs: + _dirpath = os.path.join(dirname, _dir) + # catch dir-level symlinks + if os.path.islink(_dirpath) and os.path.isdir(_dirpath): + self.file_list.append(_dirpath) for filename in files: self.file_list.append(os.path.join(dirname, filename)) return self.file_list + def get_directory_list(self): + """Return a list of all directories within the archive""" + dir_list = [] + for dirname, dirs, files in os.walk(self.extracted_path): + dir_list.append(dirname) + return dir_list + def update_sub_count(self, fname, count): """Called when a file has finished being parsed and used to track total substitutions made and number of files that had changes made @@ -230,7 +242,8 @@ class SoSObfuscationArchive(): archive root """ - if not os.path.isfile(self.get_file_path(filename)): + if (not os.path.isfile(self.get_file_path(filename)) and not + os.path.islink(self.get_file_path(filename))): return True for _skip in self.skip_list: @@ -266,7 +279,10 @@ class SoSObfuscationArchive(): if re.match(_arc_reg, fname): return True - return self.file_is_binary(fname) + if os.path.isfile(self.get_file_path(fname)): + return self.file_is_binary(fname) + # don't fail on dir-level symlinks + return False def file_is_binary(self, fname): """Determine if the file is a binary file or not. diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index cfa20b95..84874475 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -107,6 +107,8 @@ class SoSCleanerParser(): for pair in sorted(self.mapping.dataset.items(), reverse=True, key=lambda x: len(x[0])): key, val = pair + if key in self.mapping.skip_keys: + continue if key in string_data: string_data = string_data.replace(key, val) return string_data -- 2.26.3 From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 16 Jun 2021 02:24:51 -0400 Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with FQDN substrings This commit updates the hostname parser and associated map to be able to better detect and obfuscate FQDN substrings within file content and file names, particularly when the regex patterns failed to match a hostname that is formatted with '_' characters rather than '.' characters. The `get()` method has been updated to alow preserve characters and certain extensions that are not part of the FQDN, but are brought in by the regex pattern due to the fact that we need to use word boundary indicators within the pattern. Signed-off-by: Jake Hunsaker --- sos/cleaner/mappings/hostname_map.py | 59 +++++++++++++++++++++++--- sos/cleaner/parsers/__init__.py | 3 +- sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++--- 3 files changed, 81 insertions(+), 11 deletions(-) diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py index c9a44d8d..d4b2c88e 100644 --- a/sos/cleaner/mappings/hostname_map.py +++ b/sos/cleaner/mappings/hostname_map.py @@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap): host = domain.split('.') if len(host) == 1: # don't block on host's shortname - return True + return host[0] in self.hosts.keys() else: domain = host[0:-1] for known_domain in self._domains: @@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap): return False def get(self, item): - if item.startswith(('.', '_')): - item = item.lstrip('._') - item = item.strip() + prefix = '' + suffix = '' + final = None + # The regex pattern match may include a leading and/or trailing '_' + # character due to the need to use word boundary matching, so we need + # to strip these from the string during processing, but still keep them + # in the returned string to not mangle the string replacement in the + # context of the file or filename + while item.startswith(('.', '_')): + prefix += item[0] + item = item[1:] + while item.endswith(('.', '_')): + suffix += item[-1] + item = item[0:-1] if not self.domain_name_in_loaded_domains(item.lower()): return item - return super(SoSHostnameMap, self).get(item) + if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')): + ext = '.' + item.split('.')[-1] + item = item.replace(ext, '') + suffix += ext + if item not in self.dataset.keys(): + # try to account for use of '-' in names that include hostnames + # and don't create new mappings for each of these + for _existing in sorted(self.dataset.keys(), reverse=True, + key=lambda x: len(x)): + _host_substr = False + _test = item.split(_existing) + _h = _existing.split('.') + # avoid considering a full FQDN match as a new match off of + # the hostname of an existing match + if _h[0] and _h[0] in self.hosts.keys(): + _host_substr = True + if len(_test) == 1 or not _test[0]: + # does not match existing obfuscation + continue + elif _test[0].endswith('.') and not _host_substr: + # new hostname in known domain + final = super(SoSHostnameMap, self).get(item) + break + elif item.split(_test[0]): + # string that includes existing FQDN obfuscation substring + # so, only obfuscate the FQDN part + try: + itm = item.split(_test[0])[1] + final = _test[0] + super(SoSHostnameMap, self).get(itm) + break + except Exception: + # fallback to still obfuscating the entire item + pass + + if not final: + final = super(SoSHostnameMap, self).get(item) + return prefix + final + suffix def sanitize_item(self, item): host = item.split('.') @@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap): """Obfuscate the short name of the host with an incremented counter based on the total number of obfuscated host names """ + if not hostname: + return hostname if hostname not in self.hosts: ob_host = "host%s" % self.host_count self.hosts[hostname] = ob_host diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py index 84874475..57d2020a 100644 --- a/sos/cleaner/parsers/__init__.py +++ b/sos/cleaner/parsers/__init__.py @@ -87,7 +87,8 @@ class SoSCleanerParser(): for match in matches: match = match.strip() new_match = self.mapping.get(match) - line = line.replace(match, new_match) + if new_match != match: + line = line.replace(match, new_match) return line, count def parse_string_for_keys(self, string_data): diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index 9982024b..3de6bb08 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser): map_file_key = 'hostname_map' prep_map_file = 'sos_commands/host/hostname' regex_patterns = [ - r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))' + r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] def __init__(self, conf_file=None, opt_domains=None): @@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser): """Override the default parse_line() method to also check for the shortname of the host derived from the hostname. """ + + def _check_line(ln, count, search, repl=None): + """Perform a second manual check for substrings that may have been + missed by regex matching + """ + if search in self.mapping.skip_keys: + return ln, count + if search in ln: + count += ln.count(search) + ln = ln.replace(search, self.mapping.get(repl or search)) + return ln, count + count = 0 line, count = super(SoSHostnameParser, self).parse_line(line) - for short_name in self.short_names: - if short_name in line: - count += 1 - line = line.replace(short_name, self.mapping.get(short_name)) + # make an additional pass checking for '_' formatted substrings that + # the regex patterns won't catch + hosts = [h for h in self.mapping.dataset.keys() if '.' in h] + for host in sorted(hosts, reverse=True, key=lambda x: len(x)): + fqdn = host + for c in '.-': + fqdn = fqdn.replace(c, '_') + line, count = _check_line(line, count, fqdn, host) + + for short_name in sorted(self.short_names, reverse=True): + line, count = _check_line(line, count, short_name) + return line, count -- 2.26.3 From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Mon, 14 Jun 2021 09:31:07 -0400 Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile Adds explicit setting of primary/node sos options for the `openshift` plugin within the cluster, rather than relying on default configurations and best practices to avoid duplicate collections. Signed-off-by: Jake Hunsaker --- sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++-- sos/collector/sosnode.py | 4 +-- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py index 283fcfd1..ddff84a4 100644 --- a/sos/collector/clusters/ocp.py +++ b/sos/collector/clusters/ocp.py @@ -8,6 +8,8 @@ # # See the LICENSE file in the source distribution for further information. +import os + from pipes import quote from sos.collector.clusters import Cluster @@ -18,10 +20,14 @@ class ocp(Cluster): cluster_name = 'OpenShift Container Platform v4' packages = ('openshift-hyperkube', 'openshift-clients') + api_collect_enabled = False + token = None + option_list = [ ('label', '', 'Colon delimited list of labels to select nodes with'), ('role', '', 'Colon delimited list of roles to select nodes with'), - ('kubeconfig', '', 'Path to the kubeconfig file') + ('kubeconfig', '', 'Path to the kubeconfig file'), + ('token', '', 'Service account token to use for oc authorization') ] def fmt_oc_cmd(self, cmd): @@ -32,9 +38,20 @@ class ocp(Cluster): return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd) return "oc %s" % cmd + def _attempt_oc_login(self): + """Attempt to login to the API using the oc command using a provided + token + """ + _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True" + " --token=%s" % self.token) + return _res['status'] == 0 + def check_enabled(self): if super(ocp, self).check_enabled(): return True + self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None) + if self.token: + self._attempt_oc_login() _who = self.fmt_oc_cmd('whoami') return self.exec_master_cmd(_who)['status'] == 0 @@ -106,4 +123,48 @@ class ocp(Cluster): return 'master' in self.node_dict[sosnode.address]['roles'] def set_master_options(self, node): - node.opts.enable_plugins.append('openshift') + node.enable_plugins.append('openshift') + if self.api_collect_enabled: + # a primary has already been enabled for API collection, disable + # it among others + node.plugin_options.append('openshift.no-oc=on') + else: + _oc_cmd = 'oc' + if node.host.containerized: + _oc_cmd = '/host/bin/oc' + # when run from a container, the oc command does not inherit + # the default config, so if it's present then pass it here to + # detect a funcitonal oc command. This is sidestepped in sos + # report by being able to chroot the `oc` execution which we + # cannot do remotely + if node.file_exists('/root/.kube/config', need_root=True): + _oc_cmd += ' --kubeconfig /host/root/.kube/config' + can_oc = node.run_command("%s whoami" % _oc_cmd, + use_container=node.host.containerized, + # container is available only to root + # and if rhel, need to run sos as root + # anyways which will run oc as root + need_root=True) + if can_oc['status'] == 0: + # the primary node can already access the API + self.api_collect_enabled = True + elif self.token: + node.sos_env_vars['SOSOCPTOKEN'] = self.token + self.api_collect_enabled = True + elif self.get_option('kubeconfig'): + kc = self.get_option('kubeconfig') + if node.file_exists(kc): + if node.host.containerized: + kc = "/host/%s" % kc + node.sos_env_vars['KUBECONFIG'] = kc + self.api_collect_enabled = True + if self.api_collect_enabled: + msg = ("API collections will be performed on %s\nNote: API " + "collections may extend runtime by 10s of minutes\n" + % node.address) + self.soslog.info(msg) + self.ui_log.info(msg) + + def set_node_options(self, node): + # don't attempt OC API collections on non-primary nodes + node.plugin_options.append('openshift.no-oc=on') diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py index 1c25cc34..6597d236 100644 --- a/sos/collector/sosnode.py +++ b/sos/collector/sosnode.py @@ -202,11 +202,11 @@ class SosNode(): self.opts.registry_authfile or self.host.container_authfile ) - def file_exists(self, fname): + def file_exists(self, fname, need_root=False): """Checks for the presence of fname on the remote node""" if not self.local: try: - res = self.run_command("stat %s" % fname) + res = self.run_command("stat %s" % fname, need_root=need_root) return res['status'] == 0 except Exception: return False -- 2.26.3 From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 17 Jun 2021 09:52:36 -0400 Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user The 'core' user is a common default user on containerized hosts, and obfuscation of it is not advantageous, much like the default 'ubuntu' user for that distribution. Signed-off-by: Jake Hunsaker --- sos/cleaner/parsers/username_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index 0c3bbac4..64843205 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser): prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000' regex_patterns = [] skip_list = [ + 'core', 'nobody', 'nfsnobody', 'root' -- 2.26.3 From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 18 Jun 2021 14:26:55 -0400 Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive list Fixes an issue where checksums may have gotten into the list of archives to be cleaned, which would cause further issues later. Additionally, prevents nested sosreports from top-level archives (such as from `collect`) from being removed for being a binary file when that top-level archive gets obfuscated. --- sos/cleaner/__init__.py | 5 +++-- sos/cleaner/obfuscation_archive.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 88d4d0ea..8280bc50 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -226,8 +226,7 @@ third party. nested_archives = [] for _file in archive.getmembers(): if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not - (_file.name.endswith('.md5') or - _file.name.endswith('.sha256'))): + (_file.name.endswith(('.md5', '.sha256')))): nested_archives.append(_file.name.split('/')[-1]) if nested_archives: @@ -235,6 +234,8 @@ third party. nested_path = self.extract_archive(archive) for arc_file in os.listdir(nested_path): if re.match('sosreport.*.tar.*', arc_file): + if arc_file.endswith(('.md5', '.sha256')): + continue self.report_paths.append(os.path.join(nested_path, arc_file)) # add the toplevel extracted archive diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py index 90188358..e357450b 100644 --- a/sos/cleaner/obfuscation_archive.py +++ b/sos/cleaner/obfuscation_archive.py @@ -58,6 +58,7 @@ class SoSObfuscationArchive(): Returns: list of files and file regexes """ return [ + 'sosreport-', 'sys/firmware', 'sys/fs', 'sys/kernel/debug', -- 2.26.3