Blame SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch

ecf6d6
From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Fri, 9 Apr 2021 11:32:14 -0400
ecf6d6
Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on
ecf6d6
 first match
ecf6d6
ecf6d6
Previously, `parse_string_for_keys()`, called by `obfuscate_string()`
ecf6d6
for non-regex based obfuscations, would return on the first match in the
ecf6d6
string found for each parser.
ecf6d6
ecf6d6
Instead, continue iterating over all items in each parser's dataset
ecf6d6
before returning the (now fully) obfuscated string.
ecf6d6
ecf6d6
Resolves: #2480
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/parsers/__init__.py | 2 +-
ecf6d6
 1 file changed, 1 insertion(+), 1 deletion(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ecf6d6
index dd0451df..c77300aa 100644
ecf6d6
--- a/sos/cleaner/parsers/__init__.py
ecf6d6
+++ b/sos/cleaner/parsers/__init__.py
ecf6d6
@@ -104,7 +104,7 @@ class SoSCleanerParser():
ecf6d6
         """
ecf6d6
         for key, val in self.mapping.dataset.items():
ecf6d6
             if key in string_data:
ecf6d6
-                return string_data.replace(key, val)
ecf6d6
+                string_data = string_data.replace(key, val)
ecf6d6
         return string_data
ecf6d6
 
ecf6d6
     def get_map_contents(self):
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 12 May 2021 12:39:48 -0400
ecf6d6
Subject: [PATCH] [collect] Add options to provide registry auth for pulling
ecf6d6
 images
ecf6d6
ecf6d6
Adds options that allow a user to specify registry authentication,
ecf6d6
either via username/password or an authfile, to allow pulling an image
ecf6d6
that exists on a non-public registry.
ecf6d6
ecf6d6
If a username/password is provided, that will be used. If not, we will
ecf6d6
attempt to use an authfile - either provided by the user or by a cluster
ecf6d6
profile.
ecf6d6
ecf6d6
Also adds an option to forcibly pull a new(er) version of the specified
ecf6d6
image, to alleviate conditions where a too-old version of the image
ecf6d6
already exists on the host.
ecf6d6
ecf6d6
Closes: #2534
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 man/en/sos-collect.1              | 30 +++++++++++++++++++++++
ecf6d6
 sos/collector/__init__.py         | 17 +++++++++++++
ecf6d6
 sos/collector/sosnode.py          | 40 +++++++++++++++++++++++++++----
ecf6d6
 sos/policies/distros/__init__.py  | 16 ++++++++++++-
ecf6d6
 sos/policies/distros/redhat.py    | 25 ++++++++++++-------
ecf6d6
 sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++
ecf6d6
 6 files changed, 140 insertions(+), 13 deletions(-)
ecf6d6
ecf6d6
diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
ecf6d6
index 286bfe71..cdbc3257 100644
ecf6d6
--- a/man/en/sos-collect.1
ecf6d6
+++ b/man/en/sos-collect.1
ecf6d6
@@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
ecf6d6
     [\-\-no\-pkg\-check]
ecf6d6
     [\-\-no\-local]
ecf6d6
     [\-\-master MASTER]
ecf6d6
+    [\-\-image IMAGE]
ecf6d6
+    [\-\-force-pull-image]
ecf6d6
+    [\-\-registry-user USER]
ecf6d6
+    [\-\-registry-password PASSWORD]
ecf6d6
+    [\-\-registry-authfile FILE]
ecf6d6
     [\-o ONLY_PLUGINS]
ecf6d6
     [\-p SSH_PORT]
ecf6d6
     [\-\-password]
ecf6d6
@@ -245,6 +250,31 @@ Specify a master node for the cluster.
ecf6d6
 If provided, then sos collect will check the master node, not localhost, for determining
ecf6d6
 the type of cluster in use.
ecf6d6
 .TP
ecf6d6
+\fB\-\-image IMAGE\fR
ecf6d6
+Specify an image to use for the temporary container created for collections on
ecf6d6
+containerized host, if you do not want to use the default image specifed by the
ecf6d6
+host's policy. Note that this should include the registry.
ecf6d6
+.TP
ecf6d6
+\fB\-\-force-pull-image\fR
ecf6d6
+Use this option to force the container runtime to pull the specified image (even
ecf6d6
+if it is the policy default image) even if the image already exists on the host.
ecf6d6
+This may be useful to update an older container image on containerized hosts.
ecf6d6
+.TP
ecf6d6
+\fB\-\-registry-user USER\fR
ecf6d6
+Specify the username to authenticate to the registry with in order to pull the container
ecf6d6
+image
ecf6d6
+.TP
ecf6d6
+\fB\-\-registry-password PASSWORD\fR
ecf6d6
+Specify the password to authenticate to the registry with in order to pull the container
ecf6d6
+image. If no password is required, leave this blank.
ecf6d6
+.TP
ecf6d6
+\fB\-\-registry-authfile FILE\fR
ecf6d6
+Specify the filename to use for providing authentication credentials to the registry
ecf6d6
+to pull the container image.
ecf6d6
+
ecf6d6
+Note that this file must exist on the node(s) performing the pull operations, not the
ecf6d6
+node from which \fBsos collect\fR was run.
ecf6d6
+.TP
ecf6d6
 \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS
ecf6d6
 Sosreport option. Run ONLY the plugins listed.
ecf6d6
 
ecf6d6
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ecf6d6
index 1c742cf5..0624caad 100644
ecf6d6
--- a/sos/collector/__init__.py
ecf6d6
+++ b/sos/collector/__init__.py
ecf6d6
@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent):
ecf6d6
         'encrypt_pass': '',
ecf6d6
         'group': None,
ecf6d6
         'image': '',
ecf6d6
+        'force_pull_image': False,
ecf6d6
         'jobs': 4,
ecf6d6
         'keywords': [],
ecf6d6
         'keyword_file': None,
ecf6d6
@@ -84,6 +85,9 @@ class SoSCollector(SoSComponent):
ecf6d6
         'plugin_timeout': None,
ecf6d6
         'cmd_timeout': None,
ecf6d6
         'preset': '',
ecf6d6
+        'registry_user': None,
ecf6d6
+        'registry_password': None,
ecf6d6
+        'registry_authfile': None,
ecf6d6
         'save_group': '',
ecf6d6
         'since': '',
ecf6d6
         'skip_commands': [],
ecf6d6
@@ -319,6 +323,19 @@ class SoSCollector(SoSComponent):
ecf6d6
         collect_grp.add_argument('--image',
ecf6d6
                                  help=('Specify the container image to use for'
ecf6d6
                                        ' containerized hosts.'))
ecf6d6
+        collect_grp.add_argument('--force-pull-image', '--pull', default=False,
ecf6d6
+                                 action='store_true',
ecf6d6
+                                 help='Force pull the container image even if '
ecf6d6
+                                      'it already exists on the host')
ecf6d6
+        collect_grp.add_argument('--registry-user', default=None,
ecf6d6
+                                 help='Username to authenticate to the '
ecf6d6
+                                      'registry with for pulling an image')
ecf6d6
+        collect_grp.add_argument('--registry-password', default=None,
ecf6d6
+                                 help='Password to authenticate to the '
ecf6d6
+                                      'registry with for pulling an image')
ecf6d6
+        collect_grp.add_argument('--registry-authfile', default=None,
ecf6d6
+                                 help='Use this authfile to provide registry '
ecf6d6
+                                      'authentication when pulling an image')
ecf6d6
         collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key')
ecf6d6
         collect_grp.add_argument('-j', '--jobs', default=4, type=int,
ecf6d6
                                  help='Number of concurrent nodes to collect')
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 48693342..d1c11824 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -134,9 +134,27 @@ class SosNode():
ecf6d6
         """If the host is containerized, create the container we'll be using
ecf6d6
         """
ecf6d6
         if self.host.containerized:
ecf6d6
-            res = self.run_command(self.host.create_sos_container(),
ecf6d6
-                                   need_root=True)
ecf6d6
-            if res['status'] in [0, 125]:  # 125 means container exists
ecf6d6
+            cmd = self.host.create_sos_container(
ecf6d6
+                image=self.opts.image,
ecf6d6
+                auth=self.get_container_auth(),
ecf6d6
+                force_pull=self.opts.force_pull_image
ecf6d6
+            )
ecf6d6
+            res = self.run_command(cmd, need_root=True)
ecf6d6
+            if res['status'] in [0, 125]:
ecf6d6
+                if res['status'] == 125:
ecf6d6
+                    if 'unable to retrieve auth token' in res['stdout']:
ecf6d6
+                        self.log_error(
ecf6d6
+                            "Could not pull image. Provide either a username "
ecf6d6
+                            "and password or authfile"
ecf6d6
+                        )
ecf6d6
+                        raise Exception
ecf6d6
+                    elif 'unknown: Not found' in res['stdout']:
ecf6d6
+                        self.log_error('Specified image not found on registry')
ecf6d6
+                        raise Exception
ecf6d6
+                    # 'name exists' with code 125 means the container was
ecf6d6
+                    # created successfully, so ignore it.
ecf6d6
+                # initial creations leads to an exited container, restarting it
ecf6d6
+                # here will keep it alive for us to exec through
ecf6d6
                 ret = self.run_command(self.host.restart_sos_container(),
ecf6d6
                                        need_root=True)
ecf6d6
                 if ret['status'] == 0:
ecf6d6
@@ -152,6 +170,20 @@ class SosNode():
ecf6d6
                                % res['stdout'])
ecf6d6
                 raise Exception
ecf6d6
 
ecf6d6
+    def get_container_auth(self):
ecf6d6
+        """Determine what the auth string should be to pull the image used to
ecf6d6
+        deploy our temporary container
ecf6d6
+        """
ecf6d6
+        if self.opts.registry_user:
ecf6d6
+            return self.host.runtimes['default'].fmt_registry_credentials(
ecf6d6
+                self.opts.registry_user,
ecf6d6
+                self.opts.registry_password
ecf6d6
+            )
ecf6d6
+        else:
ecf6d6
+            return self.host.runtimes['default'].fmt_registry_authfile(
ecf6d6
+                self.opts.registry_authfile or self.host.container_authfile
ecf6d6
+            )
ecf6d6
+
ecf6d6
     def file_exists(self, fname):
ecf6d6
         """Checks for the presence of fname on the remote node"""
ecf6d6
         if not self.local:
ecf6d6
@@ -343,7 +375,7 @@ class SosNode():
ecf6d6
                           % self.commons['policy'].distro)
ecf6d6
             return self.commons['policy']
ecf6d6
         host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(),
ecf6d6
-                    probe_runtime=False, remote_exec=self.ssh_cmd,
ecf6d6
+                    probe_runtime=True, remote_exec=self.ssh_cmd,
ecf6d6
                     remote_check=self.read_file('/etc/os-release'))
ecf6d6
         if host:
ecf6d6
             self.log_info("loaded policy %s for host" % host.distro)
ecf6d6
diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
ecf6d6
index 9fe31513..f5b9fd5b 100644
ecf6d6
--- a/sos/policies/distros/__init__.py
ecf6d6
+++ b/sos/policies/distros/__init__.py
ecf6d6
@@ -62,6 +62,7 @@ class LinuxPolicy(Policy):
ecf6d6
     sos_bin_path = '/usr/bin'
ecf6d6
     sos_container_name = 'sos-collector-tmp'
ecf6d6
     container_version_command = None
ecf6d6
+    container_authfile = None
ecf6d6
 
ecf6d6
     def __init__(self, sysroot=None, init=None, probe_runtime=True):
ecf6d6
         super(LinuxPolicy, self).__init__(sysroot=sysroot,
ecf6d6
@@ -626,13 +627,26 @@ class LinuxPolicy(Policy):
ecf6d6
         """
ecf6d6
         return ''
ecf6d6
 
ecf6d6
-    def create_sos_container(self):
ecf6d6
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ecf6d6
         """Returns the command that will create the container that will be
ecf6d6
         used for running commands inside a container on hosts that require it.
ecf6d6
 
ecf6d6
         This will use the container runtime defined for the host type to
ecf6d6
         launch a container. From there, we use the defined runtime to exec into
ecf6d6
         the container's namespace.
ecf6d6
+
ecf6d6
+        :param image:   The name of the image if not using the policy default
ecf6d6
+        :type image:    ``str`` or ``None``
ecf6d6
+
ecf6d6
+        :param auth:    The auth string required by the runtime to pull an
ecf6d6
+                        image from the registry
ecf6d6
+        :type auth:     ``str`` or ``None``
ecf6d6
+
ecf6d6
+        :param force_pull:  Should the runtime forcibly pull the image
ecf6d6
+        :type force_pull:   ``bool``
ecf6d6
+
ecf6d6
+        :returns:   The command to execute to launch the temp container
ecf6d6
+        :rtype:     ``str``
ecf6d6
         """
ecf6d6
         return ''
ecf6d6
 
ecf6d6
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
ecf6d6
index 241d3f13..20afbcc4 100644
ecf6d6
--- a/sos/policies/distros/redhat.py
ecf6d6
+++ b/sos/policies/distros/redhat.py
ecf6d6
@@ -452,15 +452,19 @@ support representative.
ecf6d6
 
ecf6d6
         return self.find_preset(ATOMIC)
ecf6d6
 
ecf6d6
-    def create_sos_container(self):
ecf6d6
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ecf6d6
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
ecf6d6
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
ecf6d6
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
ecf6d6
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
ecf6d6
                 "/etc/machine-id:/etc/machine-id -v "
ecf6d6
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
ecf6d6
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
ecf6d6
+        _image = image or self.container_image
ecf6d6
+        _pull = '--pull=always' if force_pull else ''
ecf6d6
         return _cmd.format(runtime=self.container_runtime,
ecf6d6
                            name=self.sos_container_name,
ecf6d6
-                           image=self.container_image)
ecf6d6
+                           image=_image,
ecf6d6
+                           pull=_pull,
ecf6d6
+                           auth=auth or '')
ecf6d6
 
ecf6d6
     def set_cleanup_cmd(self):
ecf6d6
         return 'docker rm --force sos-collector-tmp'
ecf6d6
@@ -482,6 +486,7 @@ support representative.
ecf6d6
     container_image = 'registry.redhat.io/rhel8/support-tools'
ecf6d6
     sos_path_strip = '/host'
ecf6d6
     container_version_command = 'rpm -q sos'
ecf6d6
+    container_authfile = '/var/lib/kubelet/config.json'
ecf6d6
 
ecf6d6
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
ecf6d6
                  remote_exec=None):
ecf6d6
@@ -511,15 +516,19 @@ support representative.
ecf6d6
         # RH OCP environments.
ecf6d6
         return self.find_preset(RHOCP)
ecf6d6
 
ecf6d6
-    def create_sos_container(self):
ecf6d6
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ecf6d6
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
ecf6d6
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
ecf6d6
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
ecf6d6
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
ecf6d6
                 "/etc/machine-id:/etc/machine-id -v "
ecf6d6
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
ecf6d6
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
ecf6d6
+        _image = image or self.container_image
ecf6d6
+        _pull = '--pull=always' if force_pull else ''
ecf6d6
         return _cmd.format(runtime=self.container_runtime,
ecf6d6
                            name=self.sos_container_name,
ecf6d6
-                           image=self.container_image)
ecf6d6
+                           image=_image,
ecf6d6
+                           pull=_pull,
ecf6d6
+                           auth=auth or '')
ecf6d6
 
ecf6d6
     def set_cleanup_cmd(self):
ecf6d6
         return 'podman rm --force %s' % self.sos_container_name
ecf6d6
diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
ecf6d6
index 1a61b644..f28d6a1d 100644
ecf6d6
--- a/sos/policies/runtimes/__init__.py
ecf6d6
+++ b/sos/policies/runtimes/__init__.py
ecf6d6
@@ -157,6 +157,31 @@ class ContainerRuntime():
ecf6d6
             quoted_cmd = cmd
ecf6d6
         return "%s %s %s" % (self.run_cmd, container, quoted_cmd)
ecf6d6
 
ecf6d6
+    def fmt_registry_credentials(self, username, password):
ecf6d6
+        """Format a string to pass to the 'run' command of the runtime to
ecf6d6
+        enable authorization for pulling the image during `sos collect`, if
ecf6d6
+        needed using username and optional password creds
ecf6d6
+
ecf6d6
+        :param username:    The name of the registry user
ecf6d6
+        :type username:     ``str``
ecf6d6
+
ecf6d6
+        :param password:    The password of the registry user
ecf6d6
+        :type password:     ``str`` or ``None``
ecf6d6
+
ecf6d6
+        :returns:  The string to use to enable a run command to pull the image
ecf6d6
+        :rtype:    ``str``
ecf6d6
+        """
ecf6d6
+        return "--creds=%s%s" % (username, ':' + password if password else '')
ecf6d6
+
ecf6d6
+    def fmt_registry_authfile(self, authfile):
ecf6d6
+        """Format a string to pass to the 'run' command of the runtime to
ecf6d6
+        enable authorization for pulling the image during `sos collect`, if
ecf6d6
+        needed using an authfile.
ecf6d6
+        """
ecf6d6
+        if authfile:
ecf6d6
+            return "--authfile %s" % authfile
ecf6d6
+        return ''
ecf6d6
+
ecf6d6
     def get_logs_command(self, container):
ecf6d6
         """Get the command string used to dump container logs from the
ecf6d6
         runtime
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 12 May 2021 13:26:45 -0400
ecf6d6
Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes
ecf6d6
ecf6d6
Adds a method the `Cluster` that allows a profile to set sos options
ecf6d6
specifically for master nodes.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++
ecf6d6
 sos/collector/sosnode.py           |  6 ++++++
ecf6d6
 2 files changed, 27 insertions(+)
ecf6d6
ecf6d6
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ecf6d6
index 5c002bae..bfa3aad3 100644
ecf6d6
--- a/sos/collector/clusters/__init__.py
ecf6d6
+++ b/sos/collector/clusters/__init__.py
ecf6d6
@@ -137,6 +137,27 @@ class Cluster():
ecf6d6
         """
ecf6d6
         self.cluster_ssh_key = key
ecf6d6
 
ecf6d6
+    def set_master_options(self, node):
ecf6d6
+        """If there is a need to set specific options in the sos command being
ecf6d6
+        run on the cluster's master nodes, override this method in the cluster
ecf6d6
+        profile and do that here.
ecf6d6
+
ecf6d6
+        :param node:       The master node
ecf6d6
+        :type node:        ``SoSNode``
ecf6d6
+        """
ecf6d6
+        pass
ecf6d6
+
ecf6d6
+    def check_node_is_master(self, node):
ecf6d6
+        """In the event there are multiple masters, or if the collect command
ecf6d6
+        is being run from a system that is technically capable of enumerating
ecf6d6
+        nodes but the cluster profiles needs to specify master-specific options
ecf6d6
+        for other nodes, override this method in the cluster profile
ecf6d6
+
ecf6d6
+        :param node:        The node for the cluster to check
ecf6d6
+        :type node:         ``SoSNode``
ecf6d6
+        """
ecf6d6
+        return node.address == self.master.address
ecf6d6
+
ecf6d6
     def exec_master_cmd(self, cmd, need_root=False):
ecf6d6
         """Used to retrieve command output from a (master) node in a cluster
ecf6d6
 
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index d1c11824..62666635 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -647,6 +647,10 @@ class SosNode():
ecf6d6
                                         self.cluster.sos_plugin_options[opt])
ecf6d6
                     self.opts.plugin_options.append(option)
ecf6d6
 
ecf6d6
+        # set master-only options
ecf6d6
+        if self.cluster.check_node_is_master(self):
ecf6d6
+            self.cluster.set_master_options(self)
ecf6d6
+
ecf6d6
     def finalize_sos_cmd(self):
ecf6d6
         """Use host facts and compare to the cluster type to modify the sos
ecf6d6
         command if needed"""
ecf6d6
@@ -707,6 +711,8 @@ class SosNode():
ecf6d6
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ecf6d6
         )
ecf6d6
 
ecf6d6
+        self.update_cmd_from_cluster()
ecf6d6
+
ecf6d6
         if self.opts.only_plugins:
ecf6d6
             plugs = [o for o in self.opts.only_plugins
ecf6d6
                      if self._plugin_exists(o)]
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 12 May 2021 16:06:29 -0400
ecf6d6
Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node
ecf6d6
 lists
ecf6d6
ecf6d6
If the master node is determined to not have sos installed, that is not
ecf6d6
necessarily a fatal error for scenarios where the 'master' node is only
ecf6d6
being used to enumerate node lists and is not actually part of the
ecf6d6
cluster. This can happen when a user is using a bastion node to
ecf6d6
enumerate and connect to the cluster environment, or if the local host
ecf6d6
is being used to enumerate nodes via cluster client tooling.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/sosnode.py | 17 ++++++++++++-----
ecf6d6
 1 file changed, 12 insertions(+), 5 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 62666635..7e56483d 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -287,13 +287,20 @@ class SosNode():
ecf6d6
             # use the containerized policy's command
ecf6d6
             pkgs = self.run_command(self.host.container_version_command,
ecf6d6
                                     use_container=True, need_root=True)
ecf6d6
-            ver = pkgs['stdout'].strip().split('-')[1]
ecf6d6
-            if ver:
ecf6d6
-                self.sos_info['version'] = ver
ecf6d6
-        if 'version' in self.sos_info:
ecf6d6
+            if pkgs['status'] == 0:
ecf6d6
+                ver = pkgs['stdout'].strip().split('-')[1]
ecf6d6
+                if ver:
ecf6d6
+                    self.sos_info['version'] = ver
ecf6d6
+            else:
ecf6d6
+                self.sos_info['version'] = None
ecf6d6
+        if self.sos_info['version']:
ecf6d6
             self.log_info('sos version is %s' % self.sos_info['version'])
ecf6d6
         else:
ecf6d6
-            self.log_error('sos is not installed on this node')
ecf6d6
+            if not self.address == self.opts.master:
ecf6d6
+                # in the case where the 'master' enumerates nodes but is not
ecf6d6
+                # intended for collection (bastions), don't worry about sos not
ecf6d6
+                # being present
ecf6d6
+                self.log_error('sos is not installed on this node')
ecf6d6
             self.connected = False
ecf6d6
             return False
ecf6d6
         cmd = 'sosreport -l'
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 12 May 2021 18:26:09 -0400
ecf6d6
Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for
ecf6d6
 containers
ecf6d6
ecf6d6
Fixes an issue where the sos inspection commands were not properly
ecf6d6
marked as needing to be run as root (either directly or via sudo) for
ecf6d6
containerized hosts, which would lead to incorrect sos command
ecf6d6
formatting.
ecf6d6
ecf6d6
Mark those commands, and the final container removal command, as
ecf6d6
explicitly needing root permissions.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/sosnode.py | 6 +++---
ecf6d6
 1 file changed, 3 insertions(+), 3 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 7e56483d..1fc03076 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -304,7 +304,7 @@ class SosNode():
ecf6d6
             self.connected = False
ecf6d6
             return False
ecf6d6
         cmd = 'sosreport -l'
ecf6d6
-        sosinfo = self.run_command(cmd, use_container=True)
ecf6d6
+        sosinfo = self.run_command(cmd, use_container=True, need_root=True)
ecf6d6
         if sosinfo['status'] == 0:
ecf6d6
             self._load_sos_plugins(sosinfo['stdout'])
ecf6d6
         if self.check_sos_version('3.6'):
ecf6d6
@@ -312,7 +312,7 @@ class SosNode():
ecf6d6
 
ecf6d6
     def _load_sos_presets(self):
ecf6d6
         cmd = 'sosreport --list-presets'
ecf6d6
-        res = self.run_command(cmd, use_container=True)
ecf6d6
+        res = self.run_command(cmd, use_container=True, need_root=True)
ecf6d6
         if res['status'] == 0:
ecf6d6
             for line in res['stdout'].splitlines():
ecf6d6
                 if line.strip().startswith('name:'):
ecf6d6
@@ -996,7 +996,7 @@ class SosNode():
ecf6d6
             self.remove_file(self.sos_path + '.md5')
ecf6d6
         cleanup = self.host.set_cleanup_cmd()
ecf6d6
         if cleanup:
ecf6d6
-            self.run_command(cleanup)
ecf6d6
+            self.run_command(cleanup, need_root=True)
ecf6d6
 
ecf6d6
     def collect_extra_cmd(self, filenames):
ecf6d6
         """Collect the file created by a cluster outside of sos"""
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 12 May 2021 18:55:31 -0400
ecf6d6
Subject: [PATCH] [ocp] Add cluster profile for OCP4
ecf6d6
ecf6d6
Removes the previous OCP cluster profile and replaces it with an updated
ecf6d6
one for OCP4 which is entirely separated from the kubernetes profile.
ecf6d6
ecf6d6
Resolves: #2544
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/clusters/kubernetes.py |   8 --
ecf6d6
 sos/collector/clusters/ocp.py        | 109 +++++++++++++++++++++++++++
ecf6d6
 2 files changed, 109 insertions(+), 8 deletions(-)
ecf6d6
 create mode 100644 sos/collector/clusters/ocp.py
ecf6d6
ecf6d6
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
ecf6d6
index 6a867e31..08fd9554 100644
ecf6d6
--- a/sos/collector/clusters/kubernetes.py
ecf6d6
+++ b/sos/collector/clusters/kubernetes.py
ecf6d6
@@ -44,11 +44,3 @@ class kubernetes(Cluster):
ecf6d6
             return nodes
ecf6d6
         else:
ecf6d6
             raise Exception('Node enumeration did not return usable output')
ecf6d6
-
ecf6d6
-
ecf6d6
-class openshift(kubernetes):
ecf6d6
-
ecf6d6
-    cluster_name = 'OpenShift Container Platform'
ecf6d6
-    packages = ('atomic-openshift',)
ecf6d6
-    sos_preset = 'ocp'
ecf6d6
-    cmd = 'oc'
ecf6d6
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
ecf6d6
new file mode 100644
ecf6d6
index 00000000..283fcfd1
ecf6d6
--- /dev/null
ecf6d6
+++ b/sos/collector/clusters/ocp.py
ecf6d6
@@ -0,0 +1,109 @@
ecf6d6
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
+
ecf6d6
+# This file is part of the sos project: https://github.com/sosreport/sos
ecf6d6
+#
ecf6d6
+# This copyrighted material is made available to anyone wishing to use,
ecf6d6
+# modify, copy, or redistribute it subject to the terms and conditions of
ecf6d6
+# version 2 of the GNU General Public License.
ecf6d6
+#
ecf6d6
+# See the LICENSE file in the source distribution for further information.
ecf6d6
+
ecf6d6
+from pipes import quote
ecf6d6
+from sos.collector.clusters import Cluster
ecf6d6
+
ecf6d6
+
ecf6d6
+class ocp(Cluster):
ecf6d6
+    """OpenShift Container Platform v4"""
ecf6d6
+
ecf6d6
+    cluster_name = 'OpenShift Container Platform v4'
ecf6d6
+    packages = ('openshift-hyperkube', 'openshift-clients')
ecf6d6
+
ecf6d6
+    option_list = [
ecf6d6
+        ('label', '', 'Colon delimited list of labels to select nodes with'),
ecf6d6
+        ('role', '', 'Colon delimited list of roles to select nodes with'),
ecf6d6
+        ('kubeconfig', '', 'Path to the kubeconfig file')
ecf6d6
+    ]
ecf6d6
+
ecf6d6
+    def fmt_oc_cmd(self, cmd):
ecf6d6
+        """Format the oc command to optionall include the kubeconfig file if
ecf6d6
+        one is specified
ecf6d6
+        """
ecf6d6
+        if self.get_option('kubeconfig'):
ecf6d6
+            return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
ecf6d6
+        return "oc %s" % cmd
ecf6d6
+
ecf6d6
+    def check_enabled(self):
ecf6d6
+        if super(ocp, self).check_enabled():
ecf6d6
+            return True
ecf6d6
+        _who = self.fmt_oc_cmd('whoami')
ecf6d6
+        return self.exec_master_cmd(_who)['status'] == 0
ecf6d6
+
ecf6d6
+    def _build_dict(self, nodelist):
ecf6d6
+        """From the output of get_nodes(), construct an easier-to-reference
ecf6d6
+        dict of nodes that will be used in determining labels, master status,
ecf6d6
+        etc...
ecf6d6
+
ecf6d6
+        :param nodelist:        The split output of `oc get nodes`
ecf6d6
+        :type nodelist:         ``list``
ecf6d6
+
ecf6d6
+        :returns:           A dict of nodes with `get nodes` columns as keys
ecf6d6
+        :rtype:             ``dict``
ecf6d6
+        """
ecf6d6
+        nodes = {}
ecf6d6
+        if 'NAME' in nodelist[0]:
ecf6d6
+            # get the index of the fields
ecf6d6
+            statline = nodelist.pop(0).split()
ecf6d6
+            idx = {}
ecf6d6
+            for state in ['status', 'roles', 'version', 'os-image']:
ecf6d6
+                try:
ecf6d6
+                    idx[state] = statline.index(state.upper())
ecf6d6
+                except Exception:
ecf6d6
+                    pass
ecf6d6
+            for node in nodelist:
ecf6d6
+                _node = node.split()
ecf6d6
+                nodes[_node[0]] = {}
ecf6d6
+                for column in idx:
ecf6d6
+                    nodes[_node[0]][column] = _node[idx[column]]
ecf6d6
+        return nodes
ecf6d6
+
ecf6d6
+    def get_nodes(self):
ecf6d6
+        nodes = []
ecf6d6
+        self.node_dict = {}
ecf6d6
+        cmd = 'get nodes -o wide'
ecf6d6
+        if self.get_option('label'):
ecf6d6
+            labels = ','.join(self.get_option('label').split(':'))
ecf6d6
+            cmd += " -l %s" % quote(labels)
ecf6d6
+        res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
ecf6d6
+        if res['status'] == 0:
ecf6d6
+            roles = [r for r in self.get_option('role').split(':')]
ecf6d6
+            self.node_dict = self._build_dict(res['stdout'].splitlines())
ecf6d6
+            for node in self.node_dict:
ecf6d6
+                if roles:
ecf6d6
+                    for role in roles:
ecf6d6
+                        if role in node:
ecf6d6
+                            nodes.append(node)
ecf6d6
+                else:
ecf6d6
+                    nodes.append(node)
ecf6d6
+        else:
ecf6d6
+            msg = "'oc' command failed"
ecf6d6
+            if 'Missing or incomplete' in res['stdout']:
ecf6d6
+                msg = ("'oc' failed due to missing kubeconfig on master node."
ecf6d6
+                       " Specify one via '-c ocp.kubeconfig=<path>'")
ecf6d6
+            raise Exception(msg)
ecf6d6
+        return nodes
ecf6d6
+
ecf6d6
+    def set_node_label(self, node):
ecf6d6
+        if node.address not in self.node_dict:
ecf6d6
+            return ''
ecf6d6
+        for label in ['master', 'worker']:
ecf6d6
+            if label in self.node_dict[node.address]['roles']:
ecf6d6
+                return label
ecf6d6
+        return ''
ecf6d6
+
ecf6d6
+    def check_node_is_master(self, sosnode):
ecf6d6
+        if sosnode.address not in self.node_dict:
ecf6d6
+            return False
ecf6d6
+        return 'master' in self.node_dict[sosnode.address]['roles']
ecf6d6
+
ecf6d6
+    def set_master_options(self, node):
ecf6d6
+        node.opts.enable_plugins.append('openshift')
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 26 May 2021 11:55:24 -0400
ecf6d6
Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files
ecf6d6
ecf6d6
Fixes an issue where empty lines would be stripped from files that have
ecf6d6
other obfuscations in them. Those empty lines may be important for file
ecf6d6
structure and/or readability, so we should instead simply not pass empty
ecf6d6
lines to the parsers rather than skipping them wholesale in the flow of
ecf6d6
writing obfuscations to a temp file before replacing the source file
ecf6d6
with a potentially changed temp file.
ecf6d6
ecf6d6
Resolves: #2562
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/__init__.py | 6 ++++--
ecf6d6
 1 file changed, 4 insertions(+), 2 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ecf6d6
index bdd24f95..55465b85 100644
ecf6d6
--- a/sos/cleaner/__init__.py
ecf6d6
+++ b/sos/cleaner/__init__.py
ecf6d6
@@ -603,8 +603,6 @@ third party.
ecf6d6
         tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ecf6d6
         with open(filename, 'r') as fname:
ecf6d6
             for line in fname:
ecf6d6
-                if not line.strip():
ecf6d6
-                    continue
ecf6d6
                 try:
ecf6d6
                     line, count = self.obfuscate_line(line)
ecf6d6
                     subs += count
ecf6d6
@@ -642,7 +640,11 @@ third party.
ecf6d6
 
ecf6d6
         Returns the fully obfuscated line and the number of substitutions made
ecf6d6
         """
ecf6d6
+        # don't iterate over blank lines, but still write them to the tempfile
ecf6d6
+        # to maintain the same structure when we write a scrubbed file back
ecf6d6
         count = 0
ecf6d6
+        if not line.strip():
ecf6d6
+            return line, count
ecf6d6
         for parser in self.parsers:
ecf6d6
             try:
ecf6d6
                 line, _count = parser.parse_line(line)
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 5 May 2021 17:02:04 -0400
ecf6d6
Subject: [PATCH] [cleaner] Remove binary files by default
ecf6d6
ecf6d6
Binary files generally speaking cannot be obfuscated, and as such we
ecf6d6
should remove them from archives being obfuscated by default so that
ecf6d6
sensitive data is not mistakenly included in an obfuscated archive.
ecf6d6
ecf6d6
This commits adds a new `--keep-binary-files` option that if used will
ecf6d6
keep any encountered binary files in the final archive. The default
ecf6d6
option of `false` will ensure that encountered binary files are removed.
ecf6d6
ecf6d6
The number of removed binary files per archive is reported when
ecf6d6
obfuscation is completed for that archive.
ecf6d6
ecf6d6
Closes: #2478
ecf6d6
Resolves: #2524
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 man/en/sos-clean.1                          |  12 ++++
ecf6d6
 sos/cleaner/__init__.py                     |  21 +++++-
ecf6d6
 sos/cleaner/obfuscation_archive.py          |  67 ++++++++++++++++++--
ecf6d6
 sos/collector/__init__.py                   |   5 ++
ecf6d6
 sos/report/__init__.py                      |   6 ++
ecf6d6
 8 files changed, 167 insertions(+), 7 deletions(-)
ecf6d6
ecf6d6
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
ecf6d6
index 4856b43b..b77bc63c 100644
ecf6d6
--- a/man/en/sos-clean.1
ecf6d6
+++ b/man/en/sos-clean.1
ecf6d6
@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
ecf6d6
     [\-\-map-file]
ecf6d6
     [\-\-jobs]
ecf6d6
     [\-\-no-update]
ecf6d6
+    [\-\-keep-binary-files]
ecf6d6
 
ecf6d6
 .SH DESCRIPTION
ecf6d6
 \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
ecf6d6
@@ -77,6 +78,17 @@ Default: 4
ecf6d6
 .TP
ecf6d6
 .B \-\-no-update
ecf6d6
 Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
ecf6d6
+.TP
ecf6d6
+.B \-\-keep-binary-files
ecf6d6
+Keep unprocessable binary files in the archive, rather than removing them.
ecf6d6
+
ecf6d6
+Note that binary files cannot be obfuscated, and thus keeping them in the archive
ecf6d6
+may result in otherwise sensitive information being included in the final archive.
ecf6d6
+Users should review any archive that keeps binary files in place before sending to
ecf6d6
+a third party.
ecf6d6
+
ecf6d6
+Default: False (remove encountered binary files)
ecf6d6
+
ecf6d6
 .SH SEE ALSO
ecf6d6
 .BR sos (1)
ecf6d6
 .BR sos-report (1)
ecf6d6
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ecf6d6
index 55465b85..f88ff8a0 100644
ecf6d6
--- a/sos/cleaner/__init__.py
ecf6d6
+++ b/sos/cleaner/__init__.py
ecf6d6
@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
ecf6d6
         'keyword_file': None,
ecf6d6
         'map_file': '/etc/sos/cleaner/default_mapping',
ecf6d6
         'no_update': False,
ecf6d6
+        'keep_binary_files': False,
ecf6d6
         'target': '',
ecf6d6
         'usernames': []
ecf6d6
     }
ecf6d6
@@ -183,6 +184,11 @@ third party.
ecf6d6
                                action='store_true',
ecf6d6
                                help='Do not update the --map file with new '
ecf6d6
                                     'mappings from this run')
ecf6d6
+        clean_grp.add_argument('--keep-binary-files', default=False,
ecf6d6
+                               action='store_true',
ecf6d6
+                               dest='keep_binary_files',
ecf6d6
+                               help='Keep unprocessable binary files in the '
ecf6d6
+                                    'archive instead of removing them')
ecf6d6
         clean_grp.add_argument('--usernames', dest='usernames', default=[],
ecf6d6
                                action='extend',
ecf6d6
                                help='List of usernames to obfuscate')
ecf6d6
@@ -467,6 +473,11 @@ third party.
ecf6d6
                        "%s concurrently\n"
ecf6d6
                        % (len(self.report_paths), self.opts.jobs))
ecf6d6
                 self.ui_log.info(msg)
ecf6d6
+            if self.opts.keep_binary_files:
ecf6d6
+                self.ui_log.warning(
ecf6d6
+                    "WARNING: binary files that potentially contain sensitive "
ecf6d6
+                    "information will NOT be removed from the final archive\n"
ecf6d6
+                )
ecf6d6
             pool = ThreadPoolExecutor(self.opts.jobs)
ecf6d6
             pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
ecf6d6
             pool.shutdown(wait=True)
ecf6d6
@@ -539,6 +550,10 @@ third party.
ecf6d6
                 short_name = fname.split(archive.archive_name + '/')[1]
ecf6d6
                 if archive.should_skip_file(short_name):
ecf6d6
                     continue
ecf6d6
+                if (not self.opts.keep_binary_files and
ecf6d6
+                        archive.should_remove_file(short_name)):
ecf6d6
+                    archive.remove_file(short_name)
ecf6d6
+                    continue
ecf6d6
                 try:
ecf6d6
                     count = self.obfuscate_file(fname, short_name,
ecf6d6
                                                 archive.archive_name)
ecf6d6
@@ -574,7 +589,11 @@ third party.
ecf6d6
             arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
ecf6d6
             arc_md.add_field('total_substitutions', archive.total_sub_count)
ecf6d6
             self.completed_reports.append(archive)
ecf6d6
-            archive.report_msg("Obfuscation completed")
ecf6d6
+            rmsg = ''
ecf6d6
+            if archive.removed_file_count:
ecf6d6
+                rmsg = " [removed %s unprocessable files]"
ecf6d6
+                rmsg = rmsg % archive.removed_file_count
ecf6d6
+            archive.report_msg("Obfuscation completed%s" % rmsg)
ecf6d6
 
ecf6d6
         except Exception as err:
ecf6d6
             self.ui_log.info("Exception while processing %s: %s"
ecf6d6
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ecf6d6
index c64ab13b..76841b51 100644
ecf6d6
--- a/sos/cleaner/obfuscation_archive.py
ecf6d6
+++ b/sos/cleaner/obfuscation_archive.py
ecf6d6
@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
ecf6d6
 
ecf6d6
     file_sub_list = []
ecf6d6
     total_sub_count = 0
ecf6d6
+    removed_file_count = 0
ecf6d6
 
ecf6d6
     def __init__(self, archive_path, tmpdir):
ecf6d6
         self.archive_path = archive_path
ecf6d6
@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
ecf6d6
             'sys/firmware',
ecf6d6
             'sys/fs',
ecf6d6
             'sys/kernel/debug',
ecf6d6
-            'sys/module',
ecf6d6
-            r'.*\.tar$',  # TODO: support archive unpacking
ecf6d6
-            # Be explicit with these tar matches to avoid matching commands
ecf6d6
-            r'.*\.tar\.xz',
ecf6d6
-            '.*.gz'
ecf6d6
+            'sys/module'
ecf6d6
         ]
ecf6d6
 
ecf6d6
     @property
ecf6d6
@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
ecf6d6
         except Exception:
ecf6d6
             return False
ecf6d6
 
ecf6d6
+    def remove_file(self, fname):
ecf6d6
+        """Remove a file from the archive. This is used when cleaner encounters
ecf6d6
+        a binary file, which we cannot reliably obfuscate.
ecf6d6
+        """
ecf6d6
+        full_fname = self.get_file_path(fname)
ecf6d6
+        # don't call a blank remove() here
ecf6d6
+        if full_fname:
ecf6d6
+            self.log_info("Removing binary file '%s' from archive" % fname)
ecf6d6
+            os.remove(full_fname)
ecf6d6
+            self.removed_file_count += 1
ecf6d6
+
ecf6d6
     def extract(self):
ecf6d6
         if self.is_tarfile:
ecf6d6
             self.report_msg("Extracting...")
ecf6d6
@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
ecf6d6
             if filename.startswith(_skip) or re.match(_skip, filename):
ecf6d6
                 return True
ecf6d6
         return False
ecf6d6
+
ecf6d6
+    def should_remove_file(self, fname):
ecf6d6
+        """Determine if the file should be removed or not, due to an inability
ecf6d6
+        to reliably obfuscate that file based on the filename.
ecf6d6
+
ecf6d6
+        :param fname:       Filename relative to the extracted archive root
ecf6d6
+        :type fname:        ``str``
ecf6d6
+
ecf6d6
+        :returns:   ``True`` if the file cannot be reliably obfuscated
ecf6d6
+        :rtype:     ``bool``
ecf6d6
+        """
ecf6d6
+        obvious_removes = [
ecf6d6
+            r'.*\.gz',  # TODO: support flat gz/xz extraction
ecf6d6
+            r'.*\.xz',
ecf6d6
+            r'.*\.bzip2',
ecf6d6
+            r'.*\.tar\..*',  # TODO: support archive unpacking
ecf6d6
+            r'.*\.txz$',
ecf6d6
+            r'.*\.tgz$',
ecf6d6
+            r'.*\.bin',
ecf6d6
+            r'.*\.journal',
ecf6d6
+            r'.*\~$'
ecf6d6
+        ]
ecf6d6
+
ecf6d6
+        # if the filename matches, it is obvious we can remove them without
ecf6d6
+        # doing the read test
ecf6d6
+        for _arc_reg in obvious_removes:
ecf6d6
+            if re.match(_arc_reg, fname):
ecf6d6
+                return True
ecf6d6
+
ecf6d6
+        return self.file_is_binary(fname)
ecf6d6
+
ecf6d6
+    def file_is_binary(self, fname):
ecf6d6
+        """Determine if the file is a binary file or not.
ecf6d6
+
ecf6d6
+
ecf6d6
+        :param fname:          Filename relative to the extracted archive root
ecf6d6
+        :type fname:           ``str``
ecf6d6
+
ecf6d6
+        :returns:   ``True`` if file is binary, else ``False``
ecf6d6
+        :rtype:     ``bool``
ecf6d6
+        """
ecf6d6
+        with open(self.get_file_path(fname), 'tr') as tfile:
ecf6d6
+            try:
ecf6d6
+                # when opened as above (tr), reading binary content will raise
ecf6d6
+                # an exception
ecf6d6
+                tfile.read(1)
ecf6d6
+                return False
ecf6d6
+            except UnicodeDecodeError:
ecf6d6
+                return True
ecf6d6
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ecf6d6
index 9884836c..469db60d 100644
ecf6d6
--- a/sos/collector/__init__.py
ecf6d6
+++ b/sos/collector/__init__.py
ecf6d6
@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
ecf6d6
         'jobs': 4,
ecf6d6
         'keywords': [],
ecf6d6
         'keyword_file': None,
ecf6d6
+        'keep_binary_files': False,
ecf6d6
         'label': '',
ecf6d6
         'list_options': False,
ecf6d6
         'log_size': 0,
ecf6d6
@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
ecf6d6
                                  dest='clean',
ecf6d6
                                  default=False, action='store_true',
ecf6d6
                                  help='Obfuscate sensistive information')
ecf6d6
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
ecf6d6
+                                 action='store_true', dest='keep_binary_files',
ecf6d6
+                                 help='Keep unprocessable binary files in the '
ecf6d6
+                                      'archive instead of removing them')
ecf6d6
         cleaner_grp.add_argument('--domains', dest='domains', default=[],
ecf6d6
                                  action='extend',
ecf6d6
                                  help='Additional domain names to obfuscate')
ecf6d6
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
ecf6d6
index d4345409..2cedc76e 100644
ecf6d6
--- a/sos/report/__init__.py
ecf6d6
+++ b/sos/report/__init__.py
ecf6d6
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
ecf6d6
         'case_id': '',
ecf6d6
         'chroot': 'auto',
ecf6d6
         'clean': False,
ecf6d6
+        'keep_binary_files': False,
ecf6d6
         'desc': '',
ecf6d6
         'domains': [],
ecf6d6
         'dry_run': False,
ecf6d6
@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
ecf6d6
                                  default='/etc/sos/cleaner/default_mapping',
ecf6d6
                                  help=('Provide a previously generated mapping'
ecf6d6
                                        ' file for obfuscation'))
ecf6d6
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
ecf6d6
+                                 action='store_true',
ecf6d6
+                                 dest='keep_binary_files',
ecf6d6
+                                 help='Keep unprocessable binary files in the '
ecf6d6
+                                      'archive instead of removing them')
ecf6d6
         cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
ecf6d6
                                  action='extend',
ecf6d6
                                  help='List of usernames to obfuscate')
ecf6d6
ecf6d6
From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Fri, 11 Jun 2021 23:20:59 -0400
ecf6d6
Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options
ecf6d6
ecf6d6
Like we now do for primary nodes, add the ability to individually set
ecf6d6
node options via a new `set_node_options()` method for when blanket
ecf6d6
setting options across all nodes via the options class attrs is not
ecf6d6
sufficient.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/clusters/__init__.py | 10 ++++++++++
ecf6d6
 sos/collector/sosnode.py           |  6 ++++--
ecf6d6
 2 files changed, 14 insertions(+), 2 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ecf6d6
index 90e62d79..c4da1ab8 100644
ecf6d6
--- a/sos/collector/clusters/__init__.py
ecf6d6
+++ b/sos/collector/clusters/__init__.py
ecf6d6
@@ -137,6 +137,16 @@ class Cluster():
ecf6d6
         """
ecf6d6
         self.cluster_ssh_key = key
ecf6d6
 
ecf6d6
+    def set_node_options(self, node):
ecf6d6
+        """If there is a need to set specific options on ONLY the non-primary
ecf6d6
+        nodes in a collection, override this method in the cluster profile
ecf6d6
+        and do that here.
ecf6d6
+
ecf6d6
+        :param node:        The non-primary node
ecf6d6
+        :type node:         ``SoSNode``
ecf6d6
+        """
ecf6d6
+        pass
ecf6d6
+
ecf6d6
     def set_master_options(self, node):
ecf6d6
         """If there is a need to set specific options in the sos command being
ecf6d6
         run on the cluster's master nodes, override this method in the cluster
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 1fc03076..7e784aa1 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -657,6 +657,8 @@ class SosNode():
ecf6d6
         # set master-only options
ecf6d6
         if self.cluster.check_node_is_master(self):
ecf6d6
             self.cluster.set_master_options(self)
ecf6d6
+        else:
ecf6d6
+            self.cluster.set_node_options(self)
ecf6d6
 
ecf6d6
     def finalize_sos_cmd(self):
ecf6d6
         """Use host facts and compare to the cluster type to modify the sos
ecf6d6
@@ -713,13 +715,13 @@ class SosNode():
ecf6d6
                 sos_opts.append('--cmd-timeout=%s'
ecf6d6
                                 % quote(str(self.opts.cmd_timeout)))
ecf6d6
 
ecf6d6
+        self.update_cmd_from_cluster()
ecf6d6
+
ecf6d6
         sos_cmd = sos_cmd.replace(
ecf6d6
             'sosreport',
ecf6d6
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ecf6d6
         )
ecf6d6
 
ecf6d6
-        self.update_cmd_from_cluster()
ecf6d6
-
ecf6d6
         if self.opts.only_plugins:
ecf6d6
             plugs = [o for o in self.opts.only_plugins
ecf6d6
                      if self._plugin_exists(o)]
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Sat, 12 Jun 2021 00:02:36 -0400
ecf6d6
Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()`
ecf6d6
ecf6d6
Updates `run_command()` to support passing new environment variables to
ecf6d6
the command being run, for that command alone. This parameter takes a
ecf6d6
dict, and if set we will first copy the existing set of env vars on the
ecf6d6
node and then update that set of variables using the passed dict.
ecf6d6
ecf6d6
Additionally, `execute_sos_command()` will now try to pass a new
ecf6d6
`sos_env_vars` dict (default empty) so that clusters may set environment
ecf6d6
variables specifically for the sos command being run, without having to
ecf6d6
modify the actual sos command being executed.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/sosnode.py | 27 ++++++++++++++++++++++++---
ecf6d6
 1 file changed, 24 insertions(+), 3 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 7e784aa1..40472a4e 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -45,6 +45,8 @@ class SosNode():
ecf6d6
         self.host = None
ecf6d6
         self.cluster = None
ecf6d6
         self.hostname = None
ecf6d6
+        self.sos_env_vars = {}
ecf6d6
+        self._env_vars = {}
ecf6d6
         self._password = password or self.opts.password
ecf6d6
         if not self.opts.nopasswd_sudo and not self.opts.sudo_pw:
ecf6d6
             self.opts.sudo_pw = self._password
ecf6d6
@@ -109,6 +111,21 @@ class SosNode():
ecf6d6
     def _fmt_msg(self, msg):
ecf6d6
         return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg)
ecf6d6
 
ecf6d6
+    @property
ecf6d6
+    def env_vars(self):
ecf6d6
+        if not self._env_vars:
ecf6d6
+            if self.local:
ecf6d6
+                self._env_vars = os.environ.copy()
ecf6d6
+            else:
ecf6d6
+                ret = self.run_command("env --null")
ecf6d6
+                if ret['status'] == 0:
ecf6d6
+                    for ln in ret['output'].split('\x00'):
ecf6d6
+                        if not ln:
ecf6d6
+                            continue
ecf6d6
+                        _val = ln.split('=')
ecf6d6
+                        self._env_vars[_val[0]] = _val[1]
ecf6d6
+        return self._env_vars
ecf6d6
+
ecf6d6
     def set_node_manifest(self, manifest):
ecf6d6
         """Set the manifest section that this node will write to
ecf6d6
         """
ecf6d6
@@ -404,7 +421,7 @@ class SosNode():
ecf6d6
         return self.host.package_manager.pkg_by_name(pkg) is not None
ecf6d6
 
ecf6d6
     def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
ecf6d6
-                    force_local=False, use_container=False):
ecf6d6
+                    force_local=False, use_container=False, env=None):
ecf6d6
         """Runs a given cmd, either via the SSH session or locally
ecf6d6
 
ecf6d6
         Arguments:
ecf6d6
@@ -446,7 +463,10 @@ class SosNode():
ecf6d6
         else:
ecf6d6
             if get_pty:
ecf6d6
                 cmd = "/bin/bash -c %s" % quote(cmd)
ecf6d6
-        res = pexpect.spawn(cmd, encoding='utf-8')
ecf6d6
+        if env:
ecf6d6
+            _cmd_env = self.env_vars
ecf6d6
+            _cmd_env.update(env)
ecf6d6
+        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
ecf6d6
         if need_root:
ecf6d6
             if self.need_sudo:
ecf6d6
                 res.sendline(self.opts.sudo_pw)
ecf6d6
@@ -830,7 +850,8 @@ class SosNode():
ecf6d6
             res = self.run_command(self.sos_cmd,
ecf6d6
                                    timeout=self.opts.timeout,
ecf6d6
                                    get_pty=True, need_root=True,
ecf6d6
-                                   use_container=True)
ecf6d6
+                                   use_container=True,
ecf6d6
+                                   env=self.sos_env_vars)
ecf6d6
             if res['status'] == 0:
ecf6d6
                 for line in res['stdout'].splitlines():
ecf6d6
                     if fnmatch.fnmatch(line, '*sosreport-*tar*'):
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Tue, 15 Jun 2021 12:43:27 -0400
ecf6d6
Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options
ecf6d6
 between nodes
ecf6d6
ecf6d6
This commit overhauls the function of `finalize_sos_cmd()` in several
ecf6d6
ways.
ecf6d6
ecf6d6
First, assign the sos report plugin related options directly to private
ecf6d6
copies of those values for each node, so that the shared cluster profile
ecf6d6
does not clober options between nodes.
ecf6d6
ecf6d6
Second, provide a default Lock mechanism for clusters that need to
ecf6d6
perform some node-comparison logic when assigning options based on node
ecf6d6
role.
ecf6d6
ecf6d6
Finally, finalize the sos command for each node _prior_ to the call to
ecf6d6
`SoSNode.sosreport()` so that we can be sure that clusters are able to
ecf6d6
appropriately compare and assign sos options across nodes before some
ecf6d6
nodes have already started and/or finished their own sos report
ecf6d6
collections.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/__init__.py          | 14 +++++
ecf6d6
 sos/collector/clusters/__init__.py |  2 +
ecf6d6
 sos/collector/sosnode.py           | 89 +++++++++++++++++-------------
ecf6d6
 3 files changed, 67 insertions(+), 38 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ecf6d6
index 469db60d..7b8cfcf7 100644
ecf6d6
--- a/sos/collector/__init__.py
ecf6d6
+++ b/sos/collector/__init__.py
ecf6d6
@@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to.
ecf6d6
                              "concurrently\n"
ecf6d6
                              % (self.report_num, self.opts.jobs))
ecf6d6
 
ecf6d6
+            npool = ThreadPoolExecutor(self.opts.jobs)
ecf6d6
+            npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1)
ecf6d6
+            npool.shutdown(wait=True)
ecf6d6
+
ecf6d6
             pool = ThreadPoolExecutor(self.opts.jobs)
ecf6d6
             pool.map(self._collect, self.client_list, chunksize=1)
ecf6d6
             pool.shutdown(wait=True)
ecf6d6
@@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to.
ecf6d6
             except Exception as err:
ecf6d6
                 self.ui_log.error("Upload attempt failed: %s" % err)
ecf6d6
 
ecf6d6
+    def _finalize_sos_cmd(self, client):
ecf6d6
+        """Calls finalize_sos_cmd() on each node so that we have the final
ecf6d6
+        command before we thread out the actual execution of sos
ecf6d6
+        """
ecf6d6
+        try:
ecf6d6
+            client.finalize_sos_cmd()
ecf6d6
+        except Exception as err:
ecf6d6
+            self.log_error("Could not finalize sos command for %s: %s"
ecf6d6
+                           % (client.address, err))
ecf6d6
+
ecf6d6
     def _collect(self, client):
ecf6d6
         """Runs sosreport on each node"""
ecf6d6
         try:
ecf6d6
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ecf6d6
index c4da1ab8..bb728bc0 100644
ecf6d6
--- a/sos/collector/clusters/__init__.py
ecf6d6
+++ b/sos/collector/clusters/__init__.py
ecf6d6
@@ -11,6 +11,7 @@
ecf6d6
 import logging
ecf6d6
 
ecf6d6
 from sos.options import ClusterOption
ecf6d6
+from threading import Lock
ecf6d6
 
ecf6d6
 
ecf6d6
 class Cluster():
ecf6d6
@@ -66,6 +67,7 @@ class Cluster():
ecf6d6
             if cls.__name__ != 'Cluster':
ecf6d6
                 self.cluster_type.append(cls.__name__)
ecf6d6
         self.node_list = None
ecf6d6
+        self.lock = Lock()
ecf6d6
         self.soslog = logging.getLogger('sos')
ecf6d6
         self.ui_log = logging.getLogger('sos_ui')
ecf6d6
         self.options = []
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 40472a4e..1c25cc34 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -38,6 +38,7 @@ class SosNode():
ecf6d6
         self.address = address.strip()
ecf6d6
         self.commons = commons
ecf6d6
         self.opts = commons['cmdlineopts']
ecf6d6
+        self._assign_config_opts()
ecf6d6
         self.tmpdir = commons['tmpdir']
ecf6d6
         self.hostlen = commons['hostlen']
ecf6d6
         self.need_sudo = commons['need_sudo']
ecf6d6
@@ -465,8 +466,8 @@ class SosNode():
ecf6d6
                 cmd = "/bin/bash -c %s" % quote(cmd)
ecf6d6
         if env:
ecf6d6
             _cmd_env = self.env_vars
ecf6d6
-            _cmd_env.update(env)
ecf6d6
-        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
ecf6d6
+            env = _cmd_env.update(env)
ecf6d6
+        res = pexpect.spawn(cmd, encoding='utf-8', env=env)
ecf6d6
         if need_root:
ecf6d6
             if self.need_sudo:
ecf6d6
                 res.sendline(self.opts.sudo_pw)
ecf6d6
@@ -484,9 +485,6 @@ class SosNode():
ecf6d6
 
ecf6d6
     def sosreport(self):
ecf6d6
         """Run a sosreport on the node, then collect it"""
ecf6d6
-        self.sos_cmd = self.finalize_sos_cmd()
ecf6d6
-        self.log_info('Final sos command set to %s' % self.sos_cmd)
ecf6d6
-        self.manifest.add_field('final_sos_command', self.sos_cmd)
ecf6d6
         try:
ecf6d6
             path = self.execute_sos_command()
ecf6d6
             if path:
ecf6d6
@@ -656,29 +654,42 @@ class SosNode():
ecf6d6
         This will NOT override user supplied options.
ecf6d6
         """
ecf6d6
         if self.cluster.sos_preset:
ecf6d6
-            if not self.opts.preset:
ecf6d6
-                self.opts.preset = self.cluster.sos_preset
ecf6d6
+            if not self.preset:
ecf6d6
+                self.preset = self.cluster.sos_preset
ecf6d6
             else:
ecf6d6
                 self.log_info('Cluster specified preset %s but user has also '
ecf6d6
                               'defined a preset. Using user specification.'
ecf6d6
                               % self.cluster.sos_preset)
ecf6d6
         if self.cluster.sos_plugins:
ecf6d6
             for plug in self.cluster.sos_plugins:
ecf6d6
-                if plug not in self.opts.enable_plugins:
ecf6d6
-                    self.opts.enable_plugins.append(plug)
ecf6d6
+                if plug not in self.enable_plugins:
ecf6d6
+                    self.enable_plugins.append(plug)
ecf6d6
 
ecf6d6
         if self.cluster.sos_plugin_options:
ecf6d6
             for opt in self.cluster.sos_plugin_options:
ecf6d6
-                if not any(opt in o for o in self.opts.plugin_options):
ecf6d6
+                if not any(opt in o for o in self.plugin_options):
ecf6d6
                     option = '%s=%s' % (opt,
ecf6d6
                                         self.cluster.sos_plugin_options[opt])
ecf6d6
-                    self.opts.plugin_options.append(option)
ecf6d6
+                    self.plugin_options.append(option)
ecf6d6
 
ecf6d6
         # set master-only options
ecf6d6
         if self.cluster.check_node_is_master(self):
ecf6d6
-            self.cluster.set_master_options(self)
ecf6d6
+            with self.cluster.lock:
ecf6d6
+                self.cluster.set_master_options(self)
ecf6d6
         else:
ecf6d6
-            self.cluster.set_node_options(self)
ecf6d6
+            with self.cluster.lock:
ecf6d6
+                self.cluster.set_node_options(self)
ecf6d6
+
ecf6d6
+    def _assign_config_opts(self):
ecf6d6
+        """From the global opts configuration, assign those values locally
ecf6d6
+        to this node so that they may be acted on individually.
ecf6d6
+        """
ecf6d6
+        # assign these to new, private copies
ecf6d6
+        self.only_plugins = list(self.opts.only_plugins)
ecf6d6
+        self.skip_plugins = list(self.opts.skip_plugins)
ecf6d6
+        self.enable_plugins = list(self.opts.enable_plugins)
ecf6d6
+        self.plugin_options = list(self.opts.plugin_options)
ecf6d6
+        self.preset = list(self.opts.preset)
ecf6d6
 
ecf6d6
     def finalize_sos_cmd(self):
ecf6d6
         """Use host facts and compare to the cluster type to modify the sos
ecf6d6
@@ -742,59 +753,61 @@ class SosNode():
ecf6d6
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ecf6d6
         )
ecf6d6
 
ecf6d6
-        if self.opts.only_plugins:
ecf6d6
-            plugs = [o for o in self.opts.only_plugins
ecf6d6
-                     if self._plugin_exists(o)]
ecf6d6
-            if len(plugs) != len(self.opts.only_plugins):
ecf6d6
-                not_only = list(set(self.opts.only_plugins) - set(plugs))
ecf6d6
+        if self.only_plugins:
ecf6d6
+            plugs = [o for o in self.only_plugins if self._plugin_exists(o)]
ecf6d6
+            if len(plugs) != len(self.only_plugins):
ecf6d6
+                not_only = list(set(self.only_plugins) - set(plugs))
ecf6d6
                 self.log_debug('Requested plugins %s were requested to be '
ecf6d6
                                'enabled but do not exist' % not_only)
ecf6d6
-            only = self._fmt_sos_opt_list(self.opts.only_plugins)
ecf6d6
+            only = self._fmt_sos_opt_list(self.only_plugins)
ecf6d6
             if only:
ecf6d6
                 sos_opts.append('--only-plugins=%s' % quote(only))
ecf6d6
-            return "%s %s" % (sos_cmd, ' '.join(sos_opts))
ecf6d6
+            self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ecf6d6
+            self.log_info('Final sos command set to %s' % self.sos_cmd)
ecf6d6
+            self.manifest.add_field('final_sos_command', self.sos_cmd)
ecf6d6
+            return
ecf6d6
 
ecf6d6
-        if self.opts.skip_plugins:
ecf6d6
+        if self.skip_plugins:
ecf6d6
             # only run skip-plugins for plugins that are enabled
ecf6d6
-            skip = [o for o in self.opts.skip_plugins
ecf6d6
-                    if self._check_enabled(o)]
ecf6d6
-            if len(skip) != len(self.opts.skip_plugins):
ecf6d6
-                not_skip = list(set(self.opts.skip_plugins) - set(skip))
ecf6d6
+            skip = [o for o in self.skip_plugins if self._check_enabled(o)]
ecf6d6
+            if len(skip) != len(self.skip_plugins):
ecf6d6
+                not_skip = list(set(self.skip_plugins) - set(skip))
ecf6d6
                 self.log_debug('Requested to skip plugins %s, but plugins are '
ecf6d6
                                'already not enabled' % not_skip)
ecf6d6
             skipln = self._fmt_sos_opt_list(skip)
ecf6d6
             if skipln:
ecf6d6
                 sos_opts.append('--skip-plugins=%s' % quote(skipln))
ecf6d6
 
ecf6d6
-        if self.opts.enable_plugins:
ecf6d6
+        if self.enable_plugins:
ecf6d6
             # only run enable for plugins that are disabled
ecf6d6
-            opts = [o for o in self.opts.enable_plugins
ecf6d6
-                    if o not in self.opts.skip_plugins
ecf6d6
+            opts = [o for o in self.enable_plugins
ecf6d6
+                    if o not in self.skip_plugins
ecf6d6
                     and self._check_disabled(o) and self._plugin_exists(o)]
ecf6d6
-            if len(opts) != len(self.opts.enable_plugins):
ecf6d6
-                not_on = list(set(self.opts.enable_plugins) - set(opts))
ecf6d6
+            if len(opts) != len(self.enable_plugins):
ecf6d6
+                not_on = list(set(self.enable_plugins) - set(opts))
ecf6d6
                 self.log_debug('Requested to enable plugins %s, but plugins '
ecf6d6
                                'are already enabled or do not exist' % not_on)
ecf6d6
             enable = self._fmt_sos_opt_list(opts)
ecf6d6
             if enable:
ecf6d6
                 sos_opts.append('--enable-plugins=%s' % quote(enable))
ecf6d6
 
ecf6d6
-        if self.opts.plugin_options:
ecf6d6
-            opts = [o for o in self.opts.plugin_options
ecf6d6
+        if self.plugin_options:
ecf6d6
+            opts = [o for o in self.plugin_options
ecf6d6
                     if self._plugin_exists(o.split('.')[0])
ecf6d6
                     and self._plugin_option_exists(o.split('=')[0])]
ecf6d6
             if opts:
ecf6d6
                 sos_opts.append('-k %s' % quote(','.join(o for o in opts)))
ecf6d6
 
ecf6d6
-        if self.opts.preset:
ecf6d6
-            if self._preset_exists(self.opts.preset):
ecf6d6
-                sos_opts.append('--preset=%s' % quote(self.opts.preset))
ecf6d6
+        if self.preset:
ecf6d6
+            if self._preset_exists(self.preset):
ecf6d6
+                sos_opts.append('--preset=%s' % quote(self.preset))
ecf6d6
             else:
ecf6d6
                 self.log_debug('Requested to enable preset %s but preset does '
ecf6d6
-                               'not exist on node' % self.opts.preset)
ecf6d6
+                               'not exist on node' % self.preset)
ecf6d6
 
ecf6d6
-        _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ecf6d6
-        return _sos_cmd
ecf6d6
+        self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ecf6d6
+        self.log_info('Final sos command set to %s' % self.sos_cmd)
ecf6d6
+        self.manifest.add_field('final_sos_command', self.sos_cmd)
ecf6d6
 
ecf6d6
     def determine_sos_label(self):
ecf6d6
         """Determine what, if any, label should be added to the sosreport"""
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Tue, 15 Jun 2021 18:38:34 -0400
ecf6d6
Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation
ecf6d6
ecf6d6
Improves handling of symlink obfuscation by only performing the
ecf6d6
obfuscaiton on the ultimate target of any symlinks encountered. Now,
ecf6d6
when a symlink is encountered, clean will obfuscate the link name and
ecf6d6
re-write it in the archive, pointing to the (potentially obfuscated)
ecf6d6
target name.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------
ecf6d6
 1 file changed, 46 insertions(+), 19 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ecf6d6
index abfb684b..b38c8dfc 100644
ecf6d6
--- a/sos/cleaner/__init__.py
ecf6d6
+++ b/sos/cleaner/__init__.py
ecf6d6
@@ -612,28 +612,55 @@ third party.
ecf6d6
         if not filename:
ecf6d6
             # the requested file doesn't exist in the archive
ecf6d6
             return
ecf6d6
-        self.log_debug("Obfuscating %s" % short_name or filename,
ecf6d6
-                       caller=arc_name)
ecf6d6
         subs = 0
ecf6d6
-        tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ecf6d6
-        with open(filename, 'r') as fname:
ecf6d6
-            for line in fname:
ecf6d6
-                try:
ecf6d6
-                    line, count = self.obfuscate_line(line)
ecf6d6
-                    subs += count
ecf6d6
-                    tfile.write(line)
ecf6d6
-                except Exception as err:
ecf6d6
-                    self.log_debug("Unable to obfuscate %s: %s"
ecf6d6
-                                   % (short_name, err), caller=arc_name)
ecf6d6
-        tfile.seek(0)
ecf6d6
-        if subs:
ecf6d6
-            shutil.copy(tfile.name, filename)
ecf6d6
-        tfile.close()
ecf6d6
-        _ob_filename = self.obfuscate_string(short_name)
ecf6d6
-        if _ob_filename != short_name:
ecf6d6
+        if not os.path.islink(filename):
ecf6d6
+            # don't run the obfuscation on the link, but on the actual file
ecf6d6
+            # at some other point.
ecf6d6
+            self.log_debug("Obfuscating %s" % short_name or filename,
ecf6d6
+                           caller=arc_name)
ecf6d6
+            tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ecf6d6
+            with open(filename, 'r') as fname:
ecf6d6
+                for line in fname:
ecf6d6
+                    try:
ecf6d6
+                        line, count = self.obfuscate_line(line)
ecf6d6
+                        subs += count
ecf6d6
+                        tfile.write(line)
ecf6d6
+                    except Exception as err:
ecf6d6
+                        self.log_debug("Unable to obfuscate %s: %s"
ecf6d6
+                                       % (short_name, err), caller=arc_name)
ecf6d6
+            tfile.seek(0)
ecf6d6
+            if subs:
ecf6d6
+                shutil.copy(tfile.name, filename)
ecf6d6
+            tfile.close()
ecf6d6
+
ecf6d6
+        _ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
ecf6d6
+        _ob_filename = short_name.replace(short_name.split('/')[-1],
ecf6d6
+                                          _ob_short_name)
ecf6d6
+        _sym_changed = False
ecf6d6
+        if os.path.islink(filename):
ecf6d6
+            _link = os.readlink(filename)
ecf6d6
+            _ob_link = self.obfuscate_string(_link)
ecf6d6
+            if _ob_link != _link:
ecf6d6
+                _sym_changed = True
ecf6d6
+
ecf6d6
+        if (_ob_filename != short_name) or _sym_changed:
ecf6d6
             arc_path = filename.split(short_name)[0]
ecf6d6
             _ob_path = os.path.join(arc_path, _ob_filename)
ecf6d6
-            os.rename(filename, _ob_path)
ecf6d6
+            # ensure that any plugin subdirs that contain obfuscated strings
ecf6d6
+            # get created with obfuscated counterparts
ecf6d6
+            if not os.path.islink(filename):
ecf6d6
+                os.rename(filename, _ob_path)
ecf6d6
+            else:
ecf6d6
+                # generate the obfuscated name of the link target
ecf6d6
+                _target_ob = self.obfuscate_string(os.readlink(filename))
ecf6d6
+                # remove the unobfuscated original symlink first, in case the
ecf6d6
+                # symlink name hasn't changed but the target has
ecf6d6
+                os.remove(filename)
ecf6d6
+                # create the newly obfuscated symlink, pointing to the
ecf6d6
+                # obfuscated target name, which may not exist just yet, but
ecf6d6
+                # when the actual file is obfuscated, will be created
ecf6d6
+                os.symlink(_target_ob, _ob_path)
ecf6d6
+
ecf6d6
         return subs
ecf6d6
 
ecf6d6
     def obfuscate_string(self, string_data):
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Tue, 15 Jun 2021 22:56:19 -0400
ecf6d6
Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match
ecf6d6
 first
ecf6d6
ecf6d6
When matching strings in parsers to do obfuscation, we should be using
ecf6d6
the most precise matches found first, rather than matching in the order
ecf6d6
a match is hit. This ensures that we correctly obfuscate an entire
ecf6d6
string, rather than potentially only partial substring(s) that exist
ecf6d6
within the entire match.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/parsers/__init__.py        | 10 +++++++---
ecf6d6
 sos/cleaner/parsers/keyword_parser.py  |  2 +-
ecf6d6
 sos/cleaner/parsers/username_parser.py |  2 +-
ecf6d6
 3 files changed, 9 insertions(+), 5 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ecf6d6
index c77300aa..cfa20b95 100644
ecf6d6
--- a/sos/cleaner/parsers/__init__.py
ecf6d6
+++ b/sos/cleaner/parsers/__init__.py
ecf6d6
@@ -82,10 +82,12 @@ class SoSCleanerParser():
ecf6d6
         for pattern in self.regex_patterns:
ecf6d6
             matches = [m[0] for m in re.findall(pattern, line, re.I)]
ecf6d6
             if matches:
ecf6d6
+                matches.sort(reverse=True, key=lambda x: len(x))
ecf6d6
                 count += len(matches)
ecf6d6
                 for match in matches:
ecf6d6
-                    new_match = self.mapping.get(match.strip())
ecf6d6
-                    line = line.replace(match.strip(), new_match)
ecf6d6
+                    match = match.strip()
ecf6d6
+                    new_match = self.mapping.get(match)
ecf6d6
+                    line = line.replace(match, new_match)
ecf6d6
         return line, count
ecf6d6
 
ecf6d6
     def parse_string_for_keys(self, string_data):
ecf6d6
@@ -102,7 +104,9 @@ class SoSCleanerParser():
ecf6d6
         :returns: The obfuscated line
ecf6d6
         :rtype: ``str``
ecf6d6
         """
ecf6d6
-        for key, val in self.mapping.dataset.items():
ecf6d6
+        for pair in sorted(self.mapping.dataset.items(), reverse=True,
ecf6d6
+                           key=lambda x: len(x[0])):
ecf6d6
+            key, val = pair
ecf6d6
             if key in string_data:
ecf6d6
                 string_data = string_data.replace(key, val)
ecf6d6
         return string_data
ecf6d6
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
ecf6d6
index 3dc2b7f0..9134f82d 100644
ecf6d6
--- a/sos/cleaner/parsers/keyword_parser.py
ecf6d6
+++ b/sos/cleaner/parsers/keyword_parser.py
ecf6d6
@@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser):
ecf6d6
 
ecf6d6
     def parse_line(self, line):
ecf6d6
         count = 0
ecf6d6
-        for keyword in self.user_keywords:
ecf6d6
+        for keyword in sorted(self.user_keywords, reverse=True):
ecf6d6
             if keyword in line:
ecf6d6
                 line = line.replace(keyword, self.mapping.get(keyword))
ecf6d6
                 count += 1
ecf6d6
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
ecf6d6
index 2bb6c7f3..0c3bbac4 100644
ecf6d6
--- a/sos/cleaner/parsers/username_parser.py
ecf6d6
+++ b/sos/cleaner/parsers/username_parser.py
ecf6d6
@@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser):
ecf6d6
 
ecf6d6
     def parse_line(self, line):
ecf6d6
         count = 0
ecf6d6
-        for username in self.mapping.dataset.keys():
ecf6d6
+        for username in sorted(self.mapping.dataset.keys(), reverse=True):
ecf6d6
             if username in line:
ecf6d6
                 count = line.count(username)
ecf6d6
                 line = line.replace(username, self.mapping.get(username))
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 16 Jun 2021 01:15:45 -0400
ecf6d6
Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within
ecf6d6
 archives
ecf6d6
ecf6d6
This commits adds a step to `obfuscate_report()` that explicitly walks
ecf6d6
through all directories in the archive, and obfuscates the directory
ecf6d6
names if necessary.
ecf6d6
ecf6d6
Since this uses `obfuscate_string()` for the directory names, a
ecf6d6
`skip_keys` list has been added to maps to allow parsers/maps to
ecf6d6
specify matched keys (such as short names for the hostname parser) that
ecf6d6
should not be considered when obfuscating directory names (e.g. 'www').
ecf6d6
ecf6d6
Closes: #2465
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/__init__.py              | 26 ++++++++++++++++++++++++++
ecf6d6
 sos/cleaner/mappings/__init__.py     |  4 +++-
ecf6d6
 sos/cleaner/mappings/hostname_map.py |  5 +++++
ecf6d6
 sos/cleaner/obfuscation_archive.py   | 20 ++++++++++++++++++--
ecf6d6
 sos/cleaner/parsers/__init__.py      |  2 ++
ecf6d6
 5 files changed, 54 insertions(+), 3 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ecf6d6
index b38c8dfc..88d4d0ea 100644
ecf6d6
--- a/sos/cleaner/__init__.py
ecf6d6
+++ b/sos/cleaner/__init__.py
ecf6d6
@@ -562,6 +562,11 @@ third party.
ecf6d6
                 except Exception as err:
ecf6d6
                     self.log_debug("Unable to parse file %s: %s"
ecf6d6
                                    % (short_name, err))
ecf6d6
+            try:
ecf6d6
+                self.obfuscate_directory_names(archive)
ecf6d6
+            except Exception as err:
ecf6d6
+                self.log_info("Failed to obfuscate directories: %s" % err,
ecf6d6
+                              caller=archive.archive_name)
ecf6d6
 
ecf6d6
             # if the archive was already a tarball, repack it
ecf6d6
             method = archive.get_compression()
ecf6d6
@@ -663,6 +668,27 @@ third party.
ecf6d6
 
ecf6d6
         return subs
ecf6d6
 
ecf6d6
+    def obfuscate_directory_names(self, archive):
ecf6d6
+        """For all directories that exist within the archive, obfuscate the
ecf6d6
+        directory name if it contains sensitive strings found during execution
ecf6d6
+        """
ecf6d6
+        self.log_info("Obfuscating directory names in archive %s"
ecf6d6
+                      % archive.archive_name)
ecf6d6
+        for dirpath in sorted(archive.get_directory_list(), reverse=True):
ecf6d6
+            for _name in os.listdir(dirpath):
ecf6d6
+                _dirname = os.path.join(dirpath, _name)
ecf6d6
+                _arc_dir = _dirname.split(archive.extracted_path)[-1]
ecf6d6
+                if os.path.isdir(_dirname):
ecf6d6
+                    _ob_dirname = self.obfuscate_string(_name)
ecf6d6
+                    if _ob_dirname != _name:
ecf6d6
+                        _ob_arc_dir = _arc_dir.rstrip(_name)
ecf6d6
+                        _ob_arc_dir = os.path.join(
ecf6d6
+                            archive.extracted_path,
ecf6d6
+                            _ob_arc_dir.lstrip('/'),
ecf6d6
+                            _ob_dirname
ecf6d6
+                        )
ecf6d6
+                        os.rename(_dirname, _ob_arc_dir)
ecf6d6
+
ecf6d6
     def obfuscate_string(self, string_data):
ecf6d6
         for parser in self.parsers:
ecf6d6
             try:
ecf6d6
diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
ecf6d6
index dd464e5a..5cf5c8b2 100644
ecf6d6
--- a/sos/cleaner/mappings/__init__.py
ecf6d6
+++ b/sos/cleaner/mappings/__init__.py
ecf6d6
@@ -20,8 +20,10 @@ class SoSMap():
ecf6d6
     corresponding SoSMap() object, to allow for easy retrieval of obfuscated
ecf6d6
     items.
ecf6d6
     """
ecf6d6
-
ecf6d6
+    # used for regex skips in parser.parse_line()
ecf6d6
     ignore_matches = []
ecf6d6
+    # used for filename obfuscations in parser.parse_string_for_keys()
ecf6d6
+    skip_keys = []
ecf6d6
 
ecf6d6
     def __init__(self):
ecf6d6
         self.dataset = {}
ecf6d6
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
ecf6d6
index e0b7bf1d..c9a44d8d 100644
ecf6d6
--- a/sos/cleaner/mappings/hostname_map.py
ecf6d6
+++ b/sos/cleaner/mappings/hostname_map.py
ecf6d6
@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap):
ecf6d6
         '^com..*'
ecf6d6
     ]
ecf6d6
 
ecf6d6
+    skip_keys = [
ecf6d6
+        'www',
ecf6d6
+        'api'
ecf6d6
+    ]
ecf6d6
+
ecf6d6
     host_count = 0
ecf6d6
     domain_count = 0
ecf6d6
     _domains = {}
ecf6d6
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ecf6d6
index 88f978d9..90188358 100644
ecf6d6
--- a/sos/cleaner/obfuscation_archive.py
ecf6d6
+++ b/sos/cleaner/obfuscation_archive.py
ecf6d6
@@ -202,10 +202,22 @@ class SoSObfuscationArchive():
ecf6d6
         """Return a list of all files within the archive"""
ecf6d6
         self.file_list = []
ecf6d6
         for dirname, dirs, files in os.walk(self.extracted_path):
ecf6d6
+            for _dir in dirs:
ecf6d6
+                _dirpath = os.path.join(dirname, _dir)
ecf6d6
+                # catch dir-level symlinks
ecf6d6
+                if os.path.islink(_dirpath) and os.path.isdir(_dirpath):
ecf6d6
+                    self.file_list.append(_dirpath)
ecf6d6
             for filename in files:
ecf6d6
                 self.file_list.append(os.path.join(dirname, filename))
ecf6d6
         return self.file_list
ecf6d6
 
ecf6d6
+    def get_directory_list(self):
ecf6d6
+        """Return a list of all directories within the archive"""
ecf6d6
+        dir_list = []
ecf6d6
+        for dirname, dirs, files in os.walk(self.extracted_path):
ecf6d6
+            dir_list.append(dirname)
ecf6d6
+        return dir_list
ecf6d6
+
ecf6d6
     def update_sub_count(self, fname, count):
ecf6d6
         """Called when a file has finished being parsed and used to track
ecf6d6
         total substitutions made and number of files that had changes made
ecf6d6
@@ -230,7 +242,8 @@ class SoSObfuscationArchive():
ecf6d6
                                         archive root
ecf6d6
         """
ecf6d6
 
ecf6d6
-        if not os.path.isfile(self.get_file_path(filename)):
ecf6d6
+        if (not os.path.isfile(self.get_file_path(filename)) and not
ecf6d6
+                os.path.islink(self.get_file_path(filename))):
ecf6d6
             return True
ecf6d6
 
ecf6d6
         for _skip in self.skip_list:
ecf6d6
@@ -266,7 +279,10 @@ class SoSObfuscationArchive():
ecf6d6
             if re.match(_arc_reg, fname):
ecf6d6
                 return True
ecf6d6
 
ecf6d6
-        return self.file_is_binary(fname)
ecf6d6
+        if os.path.isfile(self.get_file_path(fname)):
ecf6d6
+            return self.file_is_binary(fname)
ecf6d6
+        # don't fail on dir-level symlinks
ecf6d6
+        return False
ecf6d6
 
ecf6d6
     def file_is_binary(self, fname):
ecf6d6
         """Determine if the file is a binary file or not.
ecf6d6
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ecf6d6
index cfa20b95..84874475 100644
ecf6d6
--- a/sos/cleaner/parsers/__init__.py
ecf6d6
+++ b/sos/cleaner/parsers/__init__.py
ecf6d6
@@ -107,6 +107,8 @@ class SoSCleanerParser():
ecf6d6
         for pair in sorted(self.mapping.dataset.items(), reverse=True,
ecf6d6
                            key=lambda x: len(x[0])):
ecf6d6
             key, val = pair
ecf6d6
+            if key in self.mapping.skip_keys:
ecf6d6
+                continue
ecf6d6
             if key in string_data:
ecf6d6
                 string_data = string_data.replace(key, val)
ecf6d6
         return string_data
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Wed, 16 Jun 2021 02:24:51 -0400
ecf6d6
Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with
ecf6d6
 FQDN substrings
ecf6d6
ecf6d6
This commit updates the hostname parser and associated map to be able to
ecf6d6
better detect and obfuscate FQDN substrings within file content and file
ecf6d6
names, particularly when the regex patterns failed to match a hostname
ecf6d6
that is formatted with '_' characters rather than '.' characters.
ecf6d6
ecf6d6
The `get()` method has been updated to alow preserve characters and
ecf6d6
certain extensions that are not part of the FQDN, but are brought in by
ecf6d6
the regex pattern due to the fact that we need to use word boundary
ecf6d6
indicators within the pattern.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/mappings/hostname_map.py   | 59 +++++++++++++++++++++++---
ecf6d6
 sos/cleaner/parsers/__init__.py        |  3 +-
ecf6d6
 sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++---
ecf6d6
 3 files changed, 81 insertions(+), 11 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
ecf6d6
index c9a44d8d..d4b2c88e 100644
ecf6d6
--- a/sos/cleaner/mappings/hostname_map.py
ecf6d6
+++ b/sos/cleaner/mappings/hostname_map.py
ecf6d6
@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap):
ecf6d6
         host = domain.split('.')
ecf6d6
         if len(host) == 1:
ecf6d6
             # don't block on host's shortname
ecf6d6
-            return True
ecf6d6
+            return host[0] in self.hosts.keys()
ecf6d6
         else:
ecf6d6
             domain = host[0:-1]
ecf6d6
             for known_domain in self._domains:
ecf6d6
@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap):
ecf6d6
         return False
ecf6d6
 
ecf6d6
     def get(self, item):
ecf6d6
-        if item.startswith(('.', '_')):
ecf6d6
-            item = item.lstrip('._')
ecf6d6
-        item = item.strip()
ecf6d6
+        prefix = ''
ecf6d6
+        suffix = ''
ecf6d6
+        final = None
ecf6d6
+        # The regex pattern match may include a leading and/or trailing '_'
ecf6d6
+        # character due to the need to use word boundary matching, so we need
ecf6d6
+        # to strip these from the string during processing, but still keep them
ecf6d6
+        # in the returned string to not mangle the string replacement in the
ecf6d6
+        # context of the file or filename
ecf6d6
+        while item.startswith(('.', '_')):
ecf6d6
+            prefix += item[0]
ecf6d6
+            item = item[1:]
ecf6d6
+        while item.endswith(('.', '_')):
ecf6d6
+            suffix += item[-1]
ecf6d6
+            item = item[0:-1]
ecf6d6
         if not self.domain_name_in_loaded_domains(item.lower()):
ecf6d6
             return item
ecf6d6
-        return super(SoSHostnameMap, self).get(item)
ecf6d6
+        if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
ecf6d6
+            ext = '.' + item.split('.')[-1]
ecf6d6
+            item = item.replace(ext, '')
ecf6d6
+            suffix += ext
ecf6d6
+        if item not in self.dataset.keys():
ecf6d6
+            # try to account for use of '-' in names that include hostnames
ecf6d6
+            # and don't create new mappings for each of these
ecf6d6
+            for _existing in sorted(self.dataset.keys(), reverse=True,
ecf6d6
+                                    key=lambda x: len(x)):
ecf6d6
+                _host_substr = False
ecf6d6
+                _test = item.split(_existing)
ecf6d6
+                _h = _existing.split('.')
ecf6d6
+                # avoid considering a full FQDN match as a new match off of
ecf6d6
+                # the hostname of an existing match
ecf6d6
+                if _h[0] and _h[0] in self.hosts.keys():
ecf6d6
+                    _host_substr = True
ecf6d6
+                if len(_test) == 1 or not _test[0]:
ecf6d6
+                    # does not match existing obfuscation
ecf6d6
+                    continue
ecf6d6
+                elif _test[0].endswith('.') and not _host_substr:
ecf6d6
+                    # new hostname in known domain
ecf6d6
+                    final = super(SoSHostnameMap, self).get(item)
ecf6d6
+                    break
ecf6d6
+                elif item.split(_test[0]):
ecf6d6
+                    # string that includes existing FQDN obfuscation substring
ecf6d6
+                    # so, only obfuscate the FQDN part
ecf6d6
+                    try:
ecf6d6
+                        itm = item.split(_test[0])[1]
ecf6d6
+                        final = _test[0] + super(SoSHostnameMap, self).get(itm)
ecf6d6
+                        break
ecf6d6
+                    except Exception:
ecf6d6
+                        # fallback to still obfuscating the entire item
ecf6d6
+                        pass
ecf6d6
+
ecf6d6
+        if not final:
ecf6d6
+            final = super(SoSHostnameMap, self).get(item)
ecf6d6
+        return prefix + final + suffix
ecf6d6
 
ecf6d6
     def sanitize_item(self, item):
ecf6d6
         host = item.split('.')
ecf6d6
@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap):
ecf6d6
         """Obfuscate the short name of the host with an incremented counter
ecf6d6
         based on the total number of obfuscated host names
ecf6d6
         """
ecf6d6
+        if not hostname:
ecf6d6
+            return hostname
ecf6d6
         if hostname not in self.hosts:
ecf6d6
             ob_host = "host%s" % self.host_count
ecf6d6
             self.hosts[hostname] = ob_host
ecf6d6
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ecf6d6
index 84874475..57d2020a 100644
ecf6d6
--- a/sos/cleaner/parsers/__init__.py
ecf6d6
+++ b/sos/cleaner/parsers/__init__.py
ecf6d6
@@ -87,7 +87,8 @@ class SoSCleanerParser():
ecf6d6
                 for match in matches:
ecf6d6
                     match = match.strip()
ecf6d6
                     new_match = self.mapping.get(match)
ecf6d6
-                    line = line.replace(match, new_match)
ecf6d6
+                    if new_match != match:
ecf6d6
+                        line = line.replace(match, new_match)
ecf6d6
         return line, count
ecf6d6
 
ecf6d6
     def parse_string_for_keys(self, string_data):
ecf6d6
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
ecf6d6
index 9982024b..3de6bb08 100644
ecf6d6
--- a/sos/cleaner/parsers/hostname_parser.py
ecf6d6
+++ b/sos/cleaner/parsers/hostname_parser.py
ecf6d6
@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser):
ecf6d6
     map_file_key = 'hostname_map'
ecf6d6
     prep_map_file = 'sos_commands/host/hostname'
ecf6d6
     regex_patterns = [
ecf6d6
-        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'
ecf6d6
+        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
ecf6d6
     ]
ecf6d6
 
ecf6d6
     def __init__(self, conf_file=None, opt_domains=None):
ecf6d6
@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser):
ecf6d6
         """Override the default parse_line() method to also check for the
ecf6d6
         shortname of the host derived from the hostname.
ecf6d6
         """
ecf6d6
+
ecf6d6
+        def _check_line(ln, count, search, repl=None):
ecf6d6
+            """Perform a second manual check for substrings that may have been
ecf6d6
+            missed by regex matching
ecf6d6
+            """
ecf6d6
+            if search in self.mapping.skip_keys:
ecf6d6
+                return ln, count
ecf6d6
+            if search in ln:
ecf6d6
+                count += ln.count(search)
ecf6d6
+                ln = ln.replace(search, self.mapping.get(repl or search))
ecf6d6
+            return ln, count
ecf6d6
+
ecf6d6
         count = 0
ecf6d6
         line, count = super(SoSHostnameParser, self).parse_line(line)
ecf6d6
-        for short_name in self.short_names:
ecf6d6
-            if short_name in line:
ecf6d6
-                count += 1
ecf6d6
-                line = line.replace(short_name, self.mapping.get(short_name))
ecf6d6
+        # make an additional pass checking for '_' formatted substrings that
ecf6d6
+        # the regex patterns won't catch
ecf6d6
+        hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
ecf6d6
+        for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
ecf6d6
+            fqdn = host
ecf6d6
+            for c in '.-':
ecf6d6
+                fqdn = fqdn.replace(c, '_')
ecf6d6
+            line, count = _check_line(line, count, fqdn, host)
ecf6d6
+
ecf6d6
+        for short_name in sorted(self.short_names, reverse=True):
ecf6d6
+            line, count = _check_line(line, count, short_name)
ecf6d6
+
ecf6d6
         return line, count
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Mon, 14 Jun 2021 09:31:07 -0400
ecf6d6
Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile
ecf6d6
ecf6d6
Adds explicit setting of primary/node sos options for the `openshift`
ecf6d6
plugin within the cluster, rather than relying on default configurations
ecf6d6
and best practices to avoid duplicate collections.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++--
ecf6d6
 sos/collector/sosnode.py      |  4 +--
ecf6d6
 2 files changed, 65 insertions(+), 4 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
ecf6d6
index 283fcfd1..ddff84a4 100644
ecf6d6
--- a/sos/collector/clusters/ocp.py
ecf6d6
+++ b/sos/collector/clusters/ocp.py
ecf6d6
@@ -8,6 +8,8 @@
ecf6d6
 #
ecf6d6
 # See the LICENSE file in the source distribution for further information.
ecf6d6
 
ecf6d6
+import os
ecf6d6
+
ecf6d6
 from pipes import quote
ecf6d6
 from sos.collector.clusters import Cluster
ecf6d6
 
ecf6d6
@@ -18,10 +20,14 @@ class ocp(Cluster):
ecf6d6
     cluster_name = 'OpenShift Container Platform v4'
ecf6d6
     packages = ('openshift-hyperkube', 'openshift-clients')
ecf6d6
 
ecf6d6
+    api_collect_enabled = False
ecf6d6
+    token = None
ecf6d6
+
ecf6d6
     option_list = [
ecf6d6
         ('label', '', 'Colon delimited list of labels to select nodes with'),
ecf6d6
         ('role', '', 'Colon delimited list of roles to select nodes with'),
ecf6d6
-        ('kubeconfig', '', 'Path to the kubeconfig file')
ecf6d6
+        ('kubeconfig', '', 'Path to the kubeconfig file'),
ecf6d6
+        ('token', '', 'Service account token to use for oc authorization')
ecf6d6
     ]
ecf6d6
 
ecf6d6
     def fmt_oc_cmd(self, cmd):
ecf6d6
@@ -32,9 +38,20 @@ class ocp(Cluster):
ecf6d6
             return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
ecf6d6
         return "oc %s" % cmd
ecf6d6
 
ecf6d6
+    def _attempt_oc_login(self):
ecf6d6
+        """Attempt to login to the API using the oc command using a provided
ecf6d6
+        token
ecf6d6
+        """
ecf6d6
+        _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True"
ecf6d6
+                                     " --token=%s" % self.token)
ecf6d6
+        return _res['status'] == 0
ecf6d6
+
ecf6d6
     def check_enabled(self):
ecf6d6
         if super(ocp, self).check_enabled():
ecf6d6
             return True
ecf6d6
+        self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
ecf6d6
+        if self.token:
ecf6d6
+            self._attempt_oc_login()
ecf6d6
         _who = self.fmt_oc_cmd('whoami')
ecf6d6
         return self.exec_master_cmd(_who)['status'] == 0
ecf6d6
 
ecf6d6
@@ -106,4 +123,48 @@ class ocp(Cluster):
ecf6d6
         return 'master' in self.node_dict[sosnode.address]['roles']
ecf6d6
 
ecf6d6
     def set_master_options(self, node):
ecf6d6
-        node.opts.enable_plugins.append('openshift')
ecf6d6
+        node.enable_plugins.append('openshift')
ecf6d6
+        if self.api_collect_enabled:
ecf6d6
+            # a primary has already been enabled for API collection, disable
ecf6d6
+            # it among others
ecf6d6
+            node.plugin_options.append('openshift.no-oc=on')
ecf6d6
+        else:
ecf6d6
+            _oc_cmd = 'oc'
ecf6d6
+            if node.host.containerized:
ecf6d6
+                _oc_cmd = '/host/bin/oc'
ecf6d6
+                # when run from a container, the oc command does not inherit
ecf6d6
+                # the default config, so if it's present then pass it here to
ecf6d6
+                # detect a funcitonal oc command. This is sidestepped in sos
ecf6d6
+                # report by being able to chroot the `oc` execution which we
ecf6d6
+                # cannot do remotely
ecf6d6
+                if node.file_exists('/root/.kube/config', need_root=True):
ecf6d6
+                    _oc_cmd += ' --kubeconfig /host/root/.kube/config'
ecf6d6
+            can_oc = node.run_command("%s whoami" % _oc_cmd,
ecf6d6
+                                      use_container=node.host.containerized,
ecf6d6
+                                      # container is available only to root
ecf6d6
+                                      # and if rhel, need to run sos as root
ecf6d6
+                                      # anyways which will run oc as root
ecf6d6
+                                      need_root=True)
ecf6d6
+            if can_oc['status'] == 0:
ecf6d6
+                # the primary node can already access the API
ecf6d6
+                self.api_collect_enabled = True
ecf6d6
+            elif self.token:
ecf6d6
+                node.sos_env_vars['SOSOCPTOKEN'] = self.token
ecf6d6
+                self.api_collect_enabled = True
ecf6d6
+            elif self.get_option('kubeconfig'):
ecf6d6
+                kc = self.get_option('kubeconfig')
ecf6d6
+                if node.file_exists(kc):
ecf6d6
+                    if node.host.containerized:
ecf6d6
+                        kc = "/host/%s" % kc
ecf6d6
+                    node.sos_env_vars['KUBECONFIG'] = kc
ecf6d6
+                    self.api_collect_enabled = True
ecf6d6
+            if self.api_collect_enabled:
ecf6d6
+                msg = ("API collections will be performed on %s\nNote: API "
ecf6d6
+                       "collections may extend runtime by 10s of minutes\n"
ecf6d6
+                       % node.address)
ecf6d6
+                self.soslog.info(msg)
ecf6d6
+                self.ui_log.info(msg)
ecf6d6
+
ecf6d6
+    def set_node_options(self, node):
ecf6d6
+        # don't attempt OC API collections on non-primary nodes
ecf6d6
+        node.plugin_options.append('openshift.no-oc=on')
ecf6d6
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ecf6d6
index 1c25cc34..6597d236 100644
ecf6d6
--- a/sos/collector/sosnode.py
ecf6d6
+++ b/sos/collector/sosnode.py
ecf6d6
@@ -202,11 +202,11 @@ class SosNode():
ecf6d6
                 self.opts.registry_authfile or self.host.container_authfile
ecf6d6
             )
ecf6d6
 
ecf6d6
-    def file_exists(self, fname):
ecf6d6
+    def file_exists(self, fname, need_root=False):
ecf6d6
         """Checks for the presence of fname on the remote node"""
ecf6d6
         if not self.local:
ecf6d6
             try:
ecf6d6
-                res = self.run_command("stat %s" % fname)
ecf6d6
+                res = self.run_command("stat %s" % fname, need_root=need_root)
ecf6d6
                 return res['status'] == 0
ecf6d6
             except Exception:
ecf6d6
                 return False
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Thu, 17 Jun 2021 09:52:36 -0400
ecf6d6
Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user
ecf6d6
ecf6d6
The 'core' user is a common default user on containerized hosts, and
ecf6d6
obfuscation of it is not advantageous, much like the default 'ubuntu'
ecf6d6
user for that distribution.
ecf6d6
ecf6d6
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
---
ecf6d6
 sos/cleaner/parsers/username_parser.py | 1 +
ecf6d6
 1 file changed, 1 insertion(+)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
ecf6d6
index 0c3bbac4..64843205 100644
ecf6d6
--- a/sos/cleaner/parsers/username_parser.py
ecf6d6
+++ b/sos/cleaner/parsers/username_parser.py
ecf6d6
@@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser):
ecf6d6
     prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000'
ecf6d6
     regex_patterns = []
ecf6d6
     skip_list = [
ecf6d6
+        'core',
ecf6d6
         'nobody',
ecf6d6
         'nfsnobody',
ecf6d6
         'root'
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6
ecf6d6
ecf6d6
From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001
ecf6d6
From: Jake Hunsaker <jhunsake@redhat.com>
ecf6d6
Date: Fri, 18 Jun 2021 14:26:55 -0400
ecf6d6
Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive
ecf6d6
 list
ecf6d6
ecf6d6
Fixes an issue where checksums may have gotten into the list of archives
ecf6d6
to be cleaned, which would cause further issues later. Additionally,
ecf6d6
prevents nested sosreports from top-level archives (such as from
ecf6d6
`collect`) from being removed for being a binary file when that
ecf6d6
top-level archive gets obfuscated.
ecf6d6
---
ecf6d6
 sos/cleaner/__init__.py            | 5 +++--
ecf6d6
 sos/cleaner/obfuscation_archive.py | 1 +
ecf6d6
 2 files changed, 4 insertions(+), 2 deletions(-)
ecf6d6
ecf6d6
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ecf6d6
index 88d4d0ea..8280bc50 100644
ecf6d6
--- a/sos/cleaner/__init__.py
ecf6d6
+++ b/sos/cleaner/__init__.py
ecf6d6
@@ -226,8 +226,7 @@ third party.
ecf6d6
         nested_archives = []
ecf6d6
         for _file in archive.getmembers():
ecf6d6
             if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
ecf6d6
-               (_file.name.endswith('.md5') or
ecf6d6
-               _file.name.endswith('.sha256'))):
ecf6d6
+                    (_file.name.endswith(('.md5', '.sha256')))):
ecf6d6
                 nested_archives.append(_file.name.split('/')[-1])
ecf6d6
 
ecf6d6
         if nested_archives:
ecf6d6
@@ -235,6 +234,8 @@ third party.
ecf6d6
             nested_path = self.extract_archive(archive)
ecf6d6
             for arc_file in os.listdir(nested_path):
ecf6d6
                 if re.match('sosreport.*.tar.*', arc_file):
ecf6d6
+                    if arc_file.endswith(('.md5', '.sha256')):
ecf6d6
+                        continue
ecf6d6
                     self.report_paths.append(os.path.join(nested_path,
ecf6d6
                                                           arc_file))
ecf6d6
             # add the toplevel extracted archive
ecf6d6
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ecf6d6
index 90188358..e357450b 100644
ecf6d6
--- a/sos/cleaner/obfuscation_archive.py
ecf6d6
+++ b/sos/cleaner/obfuscation_archive.py
ecf6d6
@@ -58,6 +58,7 @@ class SoSObfuscationArchive():
ecf6d6
         Returns: list of files and file regexes
ecf6d6
         """
ecf6d6
         return [
ecf6d6
+            'sosreport-',
ecf6d6
             'sys/firmware',
ecf6d6
             'sys/fs',
ecf6d6
             'sys/kernel/debug',
ecf6d6
-- 
ecf6d6
2.26.3
ecf6d6