Blame SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch

ba407d
From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Fri, 9 Apr 2021 11:32:14 -0400
ba407d
Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on
ba407d
 first match
ba407d
ba407d
Previously, `parse_string_for_keys()`, called by `obfuscate_string()`
ba407d
for non-regex based obfuscations, would return on the first match in the
ba407d
string found for each parser.
ba407d
ba407d
Instead, continue iterating over all items in each parser's dataset
ba407d
before returning the (now fully) obfuscated string.
ba407d
ba407d
Resolves: #2480
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/parsers/__init__.py | 2 +-
ba407d
 1 file changed, 1 insertion(+), 1 deletion(-)
ba407d
ba407d
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ba407d
index dd0451df..c77300aa 100644
ba407d
--- a/sos/cleaner/parsers/__init__.py
ba407d
+++ b/sos/cleaner/parsers/__init__.py
ba407d
@@ -104,7 +104,7 @@ class SoSCleanerParser():
ba407d
         """
ba407d
         for key, val in self.mapping.dataset.items():
ba407d
             if key in string_data:
ba407d
-                return string_data.replace(key, val)
ba407d
+                string_data = string_data.replace(key, val)
ba407d
         return string_data
ba407d
 
ba407d
     def get_map_contents(self):
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 12 May 2021 12:39:48 -0400
ba407d
Subject: [PATCH] [collect] Add options to provide registry auth for pulling
ba407d
 images
ba407d
ba407d
Adds options that allow a user to specify registry authentication,
ba407d
either via username/password or an authfile, to allow pulling an image
ba407d
that exists on a non-public registry.
ba407d
ba407d
If a username/password is provided, that will be used. If not, we will
ba407d
attempt to use an authfile - either provided by the user or by a cluster
ba407d
profile.
ba407d
ba407d
Also adds an option to forcibly pull a new(er) version of the specified
ba407d
image, to alleviate conditions where a too-old version of the image
ba407d
already exists on the host.
ba407d
ba407d
Closes: #2534
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 man/en/sos-collect.1              | 30 +++++++++++++++++++++++
ba407d
 sos/collector/__init__.py         | 17 +++++++++++++
ba407d
 sos/collector/sosnode.py          | 40 +++++++++++++++++++++++++++----
ba407d
 sos/policies/distros/__init__.py  | 16 ++++++++++++-
ba407d
 sos/policies/distros/redhat.py    | 25 ++++++++++++-------
ba407d
 sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++
ba407d
 6 files changed, 140 insertions(+), 13 deletions(-)
ba407d
ba407d
diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
ba407d
index 286bfe71..cdbc3257 100644
ba407d
--- a/man/en/sos-collect.1
ba407d
+++ b/man/en/sos-collect.1
ba407d
@@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
ba407d
     [\-\-no\-pkg\-check]
ba407d
     [\-\-no\-local]
ba407d
     [\-\-master MASTER]
ba407d
+    [\-\-image IMAGE]
ba407d
+    [\-\-force-pull-image]
ba407d
+    [\-\-registry-user USER]
ba407d
+    [\-\-registry-password PASSWORD]
ba407d
+    [\-\-registry-authfile FILE]
ba407d
     [\-o ONLY_PLUGINS]
ba407d
     [\-p SSH_PORT]
ba407d
     [\-\-password]
ba407d
@@ -245,6 +250,31 @@ Specify a master node for the cluster.
ba407d
 If provided, then sos collect will check the master node, not localhost, for determining
ba407d
 the type of cluster in use.
ba407d
 .TP
ba407d
+\fB\-\-image IMAGE\fR
ba407d
+Specify an image to use for the temporary container created for collections on
ba407d
+containerized host, if you do not want to use the default image specifed by the
ba407d
+host's policy. Note that this should include the registry.
ba407d
+.TP
ba407d
+\fB\-\-force-pull-image\fR
ba407d
+Use this option to force the container runtime to pull the specified image (even
ba407d
+if it is the policy default image) even if the image already exists on the host.
ba407d
+This may be useful to update an older container image on containerized hosts.
ba407d
+.TP
ba407d
+\fB\-\-registry-user USER\fR
ba407d
+Specify the username to authenticate to the registry with in order to pull the container
ba407d
+image
ba407d
+.TP
ba407d
+\fB\-\-registry-password PASSWORD\fR
ba407d
+Specify the password to authenticate to the registry with in order to pull the container
ba407d
+image. If no password is required, leave this blank.
ba407d
+.TP
ba407d
+\fB\-\-registry-authfile FILE\fR
ba407d
+Specify the filename to use for providing authentication credentials to the registry
ba407d
+to pull the container image.
ba407d
+
ba407d
+Note that this file must exist on the node(s) performing the pull operations, not the
ba407d
+node from which \fBsos collect\fR was run.
ba407d
+.TP
ba407d
 \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS
ba407d
 Sosreport option. Run ONLY the plugins listed.
ba407d
 
ba407d
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ba407d
index 1c742cf5..0624caad 100644
ba407d
--- a/sos/collector/__init__.py
ba407d
+++ b/sos/collector/__init__.py
ba407d
@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent):
ba407d
         'encrypt_pass': '',
ba407d
         'group': None,
ba407d
         'image': '',
ba407d
+        'force_pull_image': False,
ba407d
         'jobs': 4,
ba407d
         'keywords': [],
ba407d
         'keyword_file': None,
ba407d
@@ -84,6 +85,9 @@ class SoSCollector(SoSComponent):
ba407d
         'plugin_timeout': None,
ba407d
         'cmd_timeout': None,
ba407d
         'preset': '',
ba407d
+        'registry_user': None,
ba407d
+        'registry_password': None,
ba407d
+        'registry_authfile': None,
ba407d
         'save_group': '',
ba407d
         'since': '',
ba407d
         'skip_commands': [],
ba407d
@@ -319,6 +323,19 @@ class SoSCollector(SoSComponent):
ba407d
         collect_grp.add_argument('--image',
ba407d
                                  help=('Specify the container image to use for'
ba407d
                                        ' containerized hosts.'))
ba407d
+        collect_grp.add_argument('--force-pull-image', '--pull', default=False,
ba407d
+                                 action='store_true',
ba407d
+                                 help='Force pull the container image even if '
ba407d
+                                      'it already exists on the host')
ba407d
+        collect_grp.add_argument('--registry-user', default=None,
ba407d
+                                 help='Username to authenticate to the '
ba407d
+                                      'registry with for pulling an image')
ba407d
+        collect_grp.add_argument('--registry-password', default=None,
ba407d
+                                 help='Password to authenticate to the '
ba407d
+                                      'registry with for pulling an image')
ba407d
+        collect_grp.add_argument('--registry-authfile', default=None,
ba407d
+                                 help='Use this authfile to provide registry '
ba407d
+                                      'authentication when pulling an image')
ba407d
         collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key')
ba407d
         collect_grp.add_argument('-j', '--jobs', default=4, type=int,
ba407d
                                  help='Number of concurrent nodes to collect')
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 48693342..d1c11824 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -134,9 +134,27 @@ class SosNode():
ba407d
         """If the host is containerized, create the container we'll be using
ba407d
         """
ba407d
         if self.host.containerized:
ba407d
-            res = self.run_command(self.host.create_sos_container(),
ba407d
-                                   need_root=True)
ba407d
-            if res['status'] in [0, 125]:  # 125 means container exists
ba407d
+            cmd = self.host.create_sos_container(
ba407d
+                image=self.opts.image,
ba407d
+                auth=self.get_container_auth(),
ba407d
+                force_pull=self.opts.force_pull_image
ba407d
+            )
ba407d
+            res = self.run_command(cmd, need_root=True)
ba407d
+            if res['status'] in [0, 125]:
ba407d
+                if res['status'] == 125:
ba407d
+                    if 'unable to retrieve auth token' in res['stdout']:
ba407d
+                        self.log_error(
ba407d
+                            "Could not pull image. Provide either a username "
ba407d
+                            "and password or authfile"
ba407d
+                        )
ba407d
+                        raise Exception
ba407d
+                    elif 'unknown: Not found' in res['stdout']:
ba407d
+                        self.log_error('Specified image not found on registry')
ba407d
+                        raise Exception
ba407d
+                    # 'name exists' with code 125 means the container was
ba407d
+                    # created successfully, so ignore it.
ba407d
+                # initial creations leads to an exited container, restarting it
ba407d
+                # here will keep it alive for us to exec through
ba407d
                 ret = self.run_command(self.host.restart_sos_container(),
ba407d
                                        need_root=True)
ba407d
                 if ret['status'] == 0:
ba407d
@@ -152,6 +170,20 @@ class SosNode():
ba407d
                                % res['stdout'])
ba407d
                 raise Exception
ba407d
 
ba407d
+    def get_container_auth(self):
ba407d
+        """Determine what the auth string should be to pull the image used to
ba407d
+        deploy our temporary container
ba407d
+        """
ba407d
+        if self.opts.registry_user:
ba407d
+            return self.host.runtimes['default'].fmt_registry_credentials(
ba407d
+                self.opts.registry_user,
ba407d
+                self.opts.registry_password
ba407d
+            )
ba407d
+        else:
ba407d
+            return self.host.runtimes['default'].fmt_registry_authfile(
ba407d
+                self.opts.registry_authfile or self.host.container_authfile
ba407d
+            )
ba407d
+
ba407d
     def file_exists(self, fname):
ba407d
         """Checks for the presence of fname on the remote node"""
ba407d
         if not self.local:
ba407d
@@ -343,7 +375,7 @@ class SosNode():
ba407d
                           % self.commons['policy'].distro)
ba407d
             return self.commons['policy']
ba407d
         host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(),
ba407d
-                    probe_runtime=False, remote_exec=self.ssh_cmd,
ba407d
+                    probe_runtime=True, remote_exec=self.ssh_cmd,
ba407d
                     remote_check=self.read_file('/etc/os-release'))
ba407d
         if host:
ba407d
             self.log_info("loaded policy %s for host" % host.distro)
ba407d
diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
ba407d
index 9fe31513..f5b9fd5b 100644
ba407d
--- a/sos/policies/distros/__init__.py
ba407d
+++ b/sos/policies/distros/__init__.py
ba407d
@@ -62,6 +62,7 @@ class LinuxPolicy(Policy):
ba407d
     sos_bin_path = '/usr/bin'
ba407d
     sos_container_name = 'sos-collector-tmp'
ba407d
     container_version_command = None
ba407d
+    container_authfile = None
ba407d
 
ba407d
     def __init__(self, sysroot=None, init=None, probe_runtime=True):
ba407d
         super(LinuxPolicy, self).__init__(sysroot=sysroot,
ba407d
@@ -626,13 +627,26 @@ class LinuxPolicy(Policy):
ba407d
         """
ba407d
         return ''
ba407d
 
ba407d
-    def create_sos_container(self):
ba407d
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ba407d
         """Returns the command that will create the container that will be
ba407d
         used for running commands inside a container on hosts that require it.
ba407d
 
ba407d
         This will use the container runtime defined for the host type to
ba407d
         launch a container. From there, we use the defined runtime to exec into
ba407d
         the container's namespace.
ba407d
+
ba407d
+        :param image:   The name of the image if not using the policy default
ba407d
+        :type image:    ``str`` or ``None``
ba407d
+
ba407d
+        :param auth:    The auth string required by the runtime to pull an
ba407d
+                        image from the registry
ba407d
+        :type auth:     ``str`` or ``None``
ba407d
+
ba407d
+        :param force_pull:  Should the runtime forcibly pull the image
ba407d
+        :type force_pull:   ``bool``
ba407d
+
ba407d
+        :returns:   The command to execute to launch the temp container
ba407d
+        :rtype:     ``str``
ba407d
         """
ba407d
         return ''
ba407d
 
ba407d
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
ba407d
index 241d3f13..20afbcc4 100644
ba407d
--- a/sos/policies/distros/redhat.py
ba407d
+++ b/sos/policies/distros/redhat.py
ba407d
@@ -452,15 +452,19 @@ support representative.
ba407d
 
ba407d
         return self.find_preset(ATOMIC)
ba407d
 
ba407d
-    def create_sos_container(self):
ba407d
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ba407d
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
ba407d
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
ba407d
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
ba407d
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
ba407d
                 "/etc/machine-id:/etc/machine-id -v "
ba407d
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
ba407d
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
ba407d
+        _image = image or self.container_image
ba407d
+        _pull = '--pull=always' if force_pull else ''
ba407d
         return _cmd.format(runtime=self.container_runtime,
ba407d
                            name=self.sos_container_name,
ba407d
-                           image=self.container_image)
ba407d
+                           image=_image,
ba407d
+                           pull=_pull,
ba407d
+                           auth=auth or '')
ba407d
 
ba407d
     def set_cleanup_cmd(self):
ba407d
         return 'docker rm --force sos-collector-tmp'
ba407d
@@ -482,6 +486,7 @@ support representative.
ba407d
     container_image = 'registry.redhat.io/rhel8/support-tools'
ba407d
     sos_path_strip = '/host'
ba407d
     container_version_command = 'rpm -q sos'
ba407d
+    container_authfile = '/var/lib/kubelet/config.json'
ba407d
 
ba407d
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
ba407d
                  remote_exec=None):
ba407d
@@ -511,15 +516,19 @@ support representative.
ba407d
         # RH OCP environments.
ba407d
         return self.find_preset(RHOCP)
ba407d
 
ba407d
-    def create_sos_container(self):
ba407d
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
ba407d
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
ba407d
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
ba407d
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
ba407d
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
ba407d
                 "/etc/machine-id:/etc/machine-id -v "
ba407d
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
ba407d
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
ba407d
+        _image = image or self.container_image
ba407d
+        _pull = '--pull=always' if force_pull else ''
ba407d
         return _cmd.format(runtime=self.container_runtime,
ba407d
                            name=self.sos_container_name,
ba407d
-                           image=self.container_image)
ba407d
+                           image=_image,
ba407d
+                           pull=_pull,
ba407d
+                           auth=auth or '')
ba407d
 
ba407d
     def set_cleanup_cmd(self):
ba407d
         return 'podman rm --force %s' % self.sos_container_name
ba407d
diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
ba407d
index 1a61b644..f28d6a1d 100644
ba407d
--- a/sos/policies/runtimes/__init__.py
ba407d
+++ b/sos/policies/runtimes/__init__.py
ba407d
@@ -157,6 +157,31 @@ class ContainerRuntime():
ba407d
             quoted_cmd = cmd
ba407d
         return "%s %s %s" % (self.run_cmd, container, quoted_cmd)
ba407d
 
ba407d
+    def fmt_registry_credentials(self, username, password):
ba407d
+        """Format a string to pass to the 'run' command of the runtime to
ba407d
+        enable authorization for pulling the image during `sos collect`, if
ba407d
+        needed using username and optional password creds
ba407d
+
ba407d
+        :param username:    The name of the registry user
ba407d
+        :type username:     ``str``
ba407d
+
ba407d
+        :param password:    The password of the registry user
ba407d
+        :type password:     ``str`` or ``None``
ba407d
+
ba407d
+        :returns:  The string to use to enable a run command to pull the image
ba407d
+        :rtype:    ``str``
ba407d
+        """
ba407d
+        return "--creds=%s%s" % (username, ':' + password if password else '')
ba407d
+
ba407d
+    def fmt_registry_authfile(self, authfile):
ba407d
+        """Format a string to pass to the 'run' command of the runtime to
ba407d
+        enable authorization for pulling the image during `sos collect`, if
ba407d
+        needed using an authfile.
ba407d
+        """
ba407d
+        if authfile:
ba407d
+            return "--authfile %s" % authfile
ba407d
+        return ''
ba407d
+
ba407d
     def get_logs_command(self, container):
ba407d
         """Get the command string used to dump container logs from the
ba407d
         runtime
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 12 May 2021 13:26:45 -0400
ba407d
Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes
ba407d
ba407d
Adds a method the `Cluster` that allows a profile to set sos options
ba407d
specifically for master nodes.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++
ba407d
 sos/collector/sosnode.py           |  6 ++++++
ba407d
 2 files changed, 27 insertions(+)
ba407d
ba407d
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ba407d
index 5c002bae..bfa3aad3 100644
ba407d
--- a/sos/collector/clusters/__init__.py
ba407d
+++ b/sos/collector/clusters/__init__.py
ba407d
@@ -137,6 +137,27 @@ class Cluster():
ba407d
         """
ba407d
         self.cluster_ssh_key = key
ba407d
 
ba407d
+    def set_master_options(self, node):
ba407d
+        """If there is a need to set specific options in the sos command being
ba407d
+        run on the cluster's master nodes, override this method in the cluster
ba407d
+        profile and do that here.
ba407d
+
ba407d
+        :param node:       The master node
ba407d
+        :type node:        ``SoSNode``
ba407d
+        """
ba407d
+        pass
ba407d
+
ba407d
+    def check_node_is_master(self, node):
ba407d
+        """In the event there are multiple masters, or if the collect command
ba407d
+        is being run from a system that is technically capable of enumerating
ba407d
+        nodes but the cluster profiles needs to specify master-specific options
ba407d
+        for other nodes, override this method in the cluster profile
ba407d
+
ba407d
+        :param node:        The node for the cluster to check
ba407d
+        :type node:         ``SoSNode``
ba407d
+        """
ba407d
+        return node.address == self.master.address
ba407d
+
ba407d
     def exec_master_cmd(self, cmd, need_root=False):
ba407d
         """Used to retrieve command output from a (master) node in a cluster
ba407d
 
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index d1c11824..62666635 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -647,6 +647,10 @@ class SosNode():
ba407d
                                         self.cluster.sos_plugin_options[opt])
ba407d
                     self.opts.plugin_options.append(option)
ba407d
 
ba407d
+        # set master-only options
ba407d
+        if self.cluster.check_node_is_master(self):
ba407d
+            self.cluster.set_master_options(self)
ba407d
+
ba407d
     def finalize_sos_cmd(self):
ba407d
         """Use host facts and compare to the cluster type to modify the sos
ba407d
         command if needed"""
ba407d
@@ -707,6 +711,8 @@ class SosNode():
ba407d
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ba407d
         )
ba407d
 
ba407d
+        self.update_cmd_from_cluster()
ba407d
+
ba407d
         if self.opts.only_plugins:
ba407d
             plugs = [o for o in self.opts.only_plugins
ba407d
                      if self._plugin_exists(o)]
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 12 May 2021 16:06:29 -0400
ba407d
Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node
ba407d
 lists
ba407d
ba407d
If the master node is determined to not have sos installed, that is not
ba407d
necessarily a fatal error for scenarios where the 'master' node is only
ba407d
being used to enumerate node lists and is not actually part of the
ba407d
cluster. This can happen when a user is using a bastion node to
ba407d
enumerate and connect to the cluster environment, or if the local host
ba407d
is being used to enumerate nodes via cluster client tooling.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/sosnode.py | 17 ++++++++++++-----
ba407d
 1 file changed, 12 insertions(+), 5 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 62666635..7e56483d 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -287,13 +287,20 @@ class SosNode():
ba407d
             # use the containerized policy's command
ba407d
             pkgs = self.run_command(self.host.container_version_command,
ba407d
                                     use_container=True, need_root=True)
ba407d
-            ver = pkgs['stdout'].strip().split('-')[1]
ba407d
-            if ver:
ba407d
-                self.sos_info['version'] = ver
ba407d
-        if 'version' in self.sos_info:
ba407d
+            if pkgs['status'] == 0:
ba407d
+                ver = pkgs['stdout'].strip().split('-')[1]
ba407d
+                if ver:
ba407d
+                    self.sos_info['version'] = ver
ba407d
+            else:
ba407d
+                self.sos_info['version'] = None
ba407d
+        if self.sos_info['version']:
ba407d
             self.log_info('sos version is %s' % self.sos_info['version'])
ba407d
         else:
ba407d
-            self.log_error('sos is not installed on this node')
ba407d
+            if not self.address == self.opts.master:
ba407d
+                # in the case where the 'master' enumerates nodes but is not
ba407d
+                # intended for collection (bastions), don't worry about sos not
ba407d
+                # being present
ba407d
+                self.log_error('sos is not installed on this node')
ba407d
             self.connected = False
ba407d
             return False
ba407d
         cmd = 'sosreport -l'
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 12 May 2021 18:26:09 -0400
ba407d
Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for
ba407d
 containers
ba407d
ba407d
Fixes an issue where the sos inspection commands were not properly
ba407d
marked as needing to be run as root (either directly or via sudo) for
ba407d
containerized hosts, which would lead to incorrect sos command
ba407d
formatting.
ba407d
ba407d
Mark those commands, and the final container removal command, as
ba407d
explicitly needing root permissions.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/sosnode.py | 6 +++---
ba407d
 1 file changed, 3 insertions(+), 3 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 7e56483d..1fc03076 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -304,7 +304,7 @@ class SosNode():
ba407d
             self.connected = False
ba407d
             return False
ba407d
         cmd = 'sosreport -l'
ba407d
-        sosinfo = self.run_command(cmd, use_container=True)
ba407d
+        sosinfo = self.run_command(cmd, use_container=True, need_root=True)
ba407d
         if sosinfo['status'] == 0:
ba407d
             self._load_sos_plugins(sosinfo['stdout'])
ba407d
         if self.check_sos_version('3.6'):
ba407d
@@ -312,7 +312,7 @@ class SosNode():
ba407d
 
ba407d
     def _load_sos_presets(self):
ba407d
         cmd = 'sosreport --list-presets'
ba407d
-        res = self.run_command(cmd, use_container=True)
ba407d
+        res = self.run_command(cmd, use_container=True, need_root=True)
ba407d
         if res['status'] == 0:
ba407d
             for line in res['stdout'].splitlines():
ba407d
                 if line.strip().startswith('name:'):
ba407d
@@ -996,7 +996,7 @@ class SosNode():
ba407d
             self.remove_file(self.sos_path + '.md5')
ba407d
         cleanup = self.host.set_cleanup_cmd()
ba407d
         if cleanup:
ba407d
-            self.run_command(cleanup)
ba407d
+            self.run_command(cleanup, need_root=True)
ba407d
 
ba407d
     def collect_extra_cmd(self, filenames):
ba407d
         """Collect the file created by a cluster outside of sos"""
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 12 May 2021 18:55:31 -0400
ba407d
Subject: [PATCH] [ocp] Add cluster profile for OCP4
ba407d
ba407d
Removes the previous OCP cluster profile and replaces it with an updated
ba407d
one for OCP4 which is entirely separated from the kubernetes profile.
ba407d
ba407d
Resolves: #2544
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/clusters/kubernetes.py |   8 --
ba407d
 sos/collector/clusters/ocp.py        | 109 +++++++++++++++++++++++++++
ba407d
 2 files changed, 109 insertions(+), 8 deletions(-)
ba407d
 create mode 100644 sos/collector/clusters/ocp.py
ba407d
ba407d
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
ba407d
index 6a867e31..08fd9554 100644
ba407d
--- a/sos/collector/clusters/kubernetes.py
ba407d
+++ b/sos/collector/clusters/kubernetes.py
ba407d
@@ -44,11 +44,3 @@ class kubernetes(Cluster):
ba407d
             return nodes
ba407d
         else:
ba407d
             raise Exception('Node enumeration did not return usable output')
ba407d
-
ba407d
-
ba407d
-class openshift(kubernetes):
ba407d
-
ba407d
-    cluster_name = 'OpenShift Container Platform'
ba407d
-    packages = ('atomic-openshift',)
ba407d
-    sos_preset = 'ocp'
ba407d
-    cmd = 'oc'
ba407d
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
ba407d
new file mode 100644
ba407d
index 00000000..283fcfd1
ba407d
--- /dev/null
ba407d
+++ b/sos/collector/clusters/ocp.py
ba407d
@@ -0,0 +1,109 @@
ba407d
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
ba407d
+
ba407d
+# This file is part of the sos project: https://github.com/sosreport/sos
ba407d
+#
ba407d
+# This copyrighted material is made available to anyone wishing to use,
ba407d
+# modify, copy, or redistribute it subject to the terms and conditions of
ba407d
+# version 2 of the GNU General Public License.
ba407d
+#
ba407d
+# See the LICENSE file in the source distribution for further information.
ba407d
+
ba407d
+from pipes import quote
ba407d
+from sos.collector.clusters import Cluster
ba407d
+
ba407d
+
ba407d
+class ocp(Cluster):
ba407d
+    """OpenShift Container Platform v4"""
ba407d
+
ba407d
+    cluster_name = 'OpenShift Container Platform v4'
ba407d
+    packages = ('openshift-hyperkube', 'openshift-clients')
ba407d
+
ba407d
+    option_list = [
ba407d
+        ('label', '', 'Colon delimited list of labels to select nodes with'),
ba407d
+        ('role', '', 'Colon delimited list of roles to select nodes with'),
ba407d
+        ('kubeconfig', '', 'Path to the kubeconfig file')
ba407d
+    ]
ba407d
+
ba407d
+    def fmt_oc_cmd(self, cmd):
ba407d
+        """Format the oc command to optionall include the kubeconfig file if
ba407d
+        one is specified
ba407d
+        """
ba407d
+        if self.get_option('kubeconfig'):
ba407d
+            return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
ba407d
+        return "oc %s" % cmd
ba407d
+
ba407d
+    def check_enabled(self):
ba407d
+        if super(ocp, self).check_enabled():
ba407d
+            return True
ba407d
+        _who = self.fmt_oc_cmd('whoami')
ba407d
+        return self.exec_master_cmd(_who)['status'] == 0
ba407d
+
ba407d
+    def _build_dict(self, nodelist):
ba407d
+        """From the output of get_nodes(), construct an easier-to-reference
ba407d
+        dict of nodes that will be used in determining labels, master status,
ba407d
+        etc...
ba407d
+
ba407d
+        :param nodelist:        The split output of `oc get nodes`
ba407d
+        :type nodelist:         ``list``
ba407d
+
ba407d
+        :returns:           A dict of nodes with `get nodes` columns as keys
ba407d
+        :rtype:             ``dict``
ba407d
+        """
ba407d
+        nodes = {}
ba407d
+        if 'NAME' in nodelist[0]:
ba407d
+            # get the index of the fields
ba407d
+            statline = nodelist.pop(0).split()
ba407d
+            idx = {}
ba407d
+            for state in ['status', 'roles', 'version', 'os-image']:
ba407d
+                try:
ba407d
+                    idx[state] = statline.index(state.upper())
ba407d
+                except Exception:
ba407d
+                    pass
ba407d
+            for node in nodelist:
ba407d
+                _node = node.split()
ba407d
+                nodes[_node[0]] = {}
ba407d
+                for column in idx:
ba407d
+                    nodes[_node[0]][column] = _node[idx[column]]
ba407d
+        return nodes
ba407d
+
ba407d
+    def get_nodes(self):
ba407d
+        nodes = []
ba407d
+        self.node_dict = {}
ba407d
+        cmd = 'get nodes -o wide'
ba407d
+        if self.get_option('label'):
ba407d
+            labels = ','.join(self.get_option('label').split(':'))
ba407d
+            cmd += " -l %s" % quote(labels)
ba407d
+        res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
ba407d
+        if res['status'] == 0:
ba407d
+            roles = [r for r in self.get_option('role').split(':')]
ba407d
+            self.node_dict = self._build_dict(res['stdout'].splitlines())
ba407d
+            for node in self.node_dict:
ba407d
+                if roles:
ba407d
+                    for role in roles:
ba407d
+                        if role in node:
ba407d
+                            nodes.append(node)
ba407d
+                else:
ba407d
+                    nodes.append(node)
ba407d
+        else:
ba407d
+            msg = "'oc' command failed"
ba407d
+            if 'Missing or incomplete' in res['stdout']:
ba407d
+                msg = ("'oc' failed due to missing kubeconfig on master node."
ba407d
+                       " Specify one via '-c ocp.kubeconfig=<path>'")
ba407d
+            raise Exception(msg)
ba407d
+        return nodes
ba407d
+
ba407d
+    def set_node_label(self, node):
ba407d
+        if node.address not in self.node_dict:
ba407d
+            return ''
ba407d
+        for label in ['master', 'worker']:
ba407d
+            if label in self.node_dict[node.address]['roles']:
ba407d
+                return label
ba407d
+        return ''
ba407d
+
ba407d
+    def check_node_is_master(self, sosnode):
ba407d
+        if sosnode.address not in self.node_dict:
ba407d
+            return False
ba407d
+        return 'master' in self.node_dict[sosnode.address]['roles']
ba407d
+
ba407d
+    def set_master_options(self, node):
ba407d
+        node.opts.enable_plugins.append('openshift')
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 26 May 2021 11:55:24 -0400
ba407d
Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files
ba407d
ba407d
Fixes an issue where empty lines would be stripped from files that have
ba407d
other obfuscations in them. Those empty lines may be important for file
ba407d
structure and/or readability, so we should instead simply not pass empty
ba407d
lines to the parsers rather than skipping them wholesale in the flow of
ba407d
writing obfuscations to a temp file before replacing the source file
ba407d
with a potentially changed temp file.
ba407d
ba407d
Resolves: #2562
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/__init__.py | 6 ++++--
ba407d
 1 file changed, 4 insertions(+), 2 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index bdd24f95..55465b85 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -603,8 +603,6 @@ third party.
ba407d
         tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ba407d
         with open(filename, 'r') as fname:
ba407d
             for line in fname:
ba407d
-                if not line.strip():
ba407d
-                    continue
ba407d
                 try:
ba407d
                     line, count = self.obfuscate_line(line)
ba407d
                     subs += count
ba407d
@@ -642,7 +640,11 @@ third party.
ba407d
 
ba407d
         Returns the fully obfuscated line and the number of substitutions made
ba407d
         """
ba407d
+        # don't iterate over blank lines, but still write them to the tempfile
ba407d
+        # to maintain the same structure when we write a scrubbed file back
ba407d
         count = 0
ba407d
+        if not line.strip():
ba407d
+            return line, count
ba407d
         for parser in self.parsers:
ba407d
             try:
ba407d
                 line, _count = parser.parse_line(line)
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 5 May 2021 17:02:04 -0400
ba407d
Subject: [PATCH] [cleaner] Remove binary files by default
ba407d
ba407d
Binary files generally speaking cannot be obfuscated, and as such we
ba407d
should remove them from archives being obfuscated by default so that
ba407d
sensitive data is not mistakenly included in an obfuscated archive.
ba407d
ba407d
This commits adds a new `--keep-binary-files` option that if used will
ba407d
keep any encountered binary files in the final archive. The default
ba407d
option of `false` will ensure that encountered binary files are removed.
ba407d
ba407d
The number of removed binary files per archive is reported when
ba407d
obfuscation is completed for that archive.
ba407d
ba407d
Closes: #2478
ba407d
Resolves: #2524
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 man/en/sos-clean.1                          |  12 ++++
ba407d
 sos/cleaner/__init__.py                     |  21 +++++-
ba407d
 sos/cleaner/obfuscation_archive.py          |  67 ++++++++++++++++++--
ba407d
 sos/collector/__init__.py                   |   5 ++
ba407d
 sos/report/__init__.py                      |   6 ++
ba407d
 8 files changed, 167 insertions(+), 7 deletions(-)
ba407d
ba407d
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
ba407d
index 4856b43b..b77bc63c 100644
ba407d
--- a/man/en/sos-clean.1
ba407d
+++ b/man/en/sos-clean.1
ba407d
@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
ba407d
     [\-\-map-file]
ba407d
     [\-\-jobs]
ba407d
     [\-\-no-update]
ba407d
+    [\-\-keep-binary-files]
ba407d
 
ba407d
 .SH DESCRIPTION
ba407d
 \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
ba407d
@@ -77,6 +78,17 @@ Default: 4
ba407d
 .TP
ba407d
 .B \-\-no-update
ba407d
 Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
ba407d
+.TP
ba407d
+.B \-\-keep-binary-files
ba407d
+Keep unprocessable binary files in the archive, rather than removing them.
ba407d
+
ba407d
+Note that binary files cannot be obfuscated, and thus keeping them in the archive
ba407d
+may result in otherwise sensitive information being included in the final archive.
ba407d
+Users should review any archive that keeps binary files in place before sending to
ba407d
+a third party.
ba407d
+
ba407d
+Default: False (remove encountered binary files)
ba407d
+
ba407d
 .SH SEE ALSO
ba407d
 .BR sos (1)
ba407d
 .BR sos-report (1)
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index 55465b85..f88ff8a0 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
ba407d
         'keyword_file': None,
ba407d
         'map_file': '/etc/sos/cleaner/default_mapping',
ba407d
         'no_update': False,
ba407d
+        'keep_binary_files': False,
ba407d
         'target': '',
ba407d
         'usernames': []
ba407d
     }
ba407d
@@ -183,6 +184,11 @@ third party.
ba407d
                                action='store_true',
ba407d
                                help='Do not update the --map file with new '
ba407d
                                     'mappings from this run')
ba407d
+        clean_grp.add_argument('--keep-binary-files', default=False,
ba407d
+                               action='store_true',
ba407d
+                               dest='keep_binary_files',
ba407d
+                               help='Keep unprocessable binary files in the '
ba407d
+                                    'archive instead of removing them')
ba407d
         clean_grp.add_argument('--usernames', dest='usernames', default=[],
ba407d
                                action='extend',
ba407d
                                help='List of usernames to obfuscate')
ba407d
@@ -467,6 +473,11 @@ third party.
ba407d
                        "%s concurrently\n"
ba407d
                        % (len(self.report_paths), self.opts.jobs))
ba407d
                 self.ui_log.info(msg)
ba407d
+            if self.opts.keep_binary_files:
ba407d
+                self.ui_log.warning(
ba407d
+                    "WARNING: binary files that potentially contain sensitive "
ba407d
+                    "information will NOT be removed from the final archive\n"
ba407d
+                )
ba407d
             pool = ThreadPoolExecutor(self.opts.jobs)
ba407d
             pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
ba407d
             pool.shutdown(wait=True)
ba407d
@@ -539,6 +550,10 @@ third party.
ba407d
                 short_name = fname.split(archive.archive_name + '/')[1]
ba407d
                 if archive.should_skip_file(short_name):
ba407d
                     continue
ba407d
+                if (not self.opts.keep_binary_files and
ba407d
+                        archive.should_remove_file(short_name)):
ba407d
+                    archive.remove_file(short_name)
ba407d
+                    continue
ba407d
                 try:
ba407d
                     count = self.obfuscate_file(fname, short_name,
ba407d
                                                 archive.archive_name)
ba407d
@@ -574,7 +589,11 @@ third party.
ba407d
             arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
ba407d
             arc_md.add_field('total_substitutions', archive.total_sub_count)
ba407d
             self.completed_reports.append(archive)
ba407d
-            archive.report_msg("Obfuscation completed")
ba407d
+            rmsg = ''
ba407d
+            if archive.removed_file_count:
ba407d
+                rmsg = " [removed %s unprocessable files]"
ba407d
+                rmsg = rmsg % archive.removed_file_count
ba407d
+            archive.report_msg("Obfuscation completed%s" % rmsg)
ba407d
 
ba407d
         except Exception as err:
ba407d
             self.ui_log.info("Exception while processing %s: %s"
ba407d
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ba407d
index c64ab13b..76841b51 100644
ba407d
--- a/sos/cleaner/obfuscation_archive.py
ba407d
+++ b/sos/cleaner/obfuscation_archive.py
ba407d
@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
ba407d
 
ba407d
     file_sub_list = []
ba407d
     total_sub_count = 0
ba407d
+    removed_file_count = 0
ba407d
 
ba407d
     def __init__(self, archive_path, tmpdir):
ba407d
         self.archive_path = archive_path
ba407d
@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
ba407d
             'sys/firmware',
ba407d
             'sys/fs',
ba407d
             'sys/kernel/debug',
ba407d
-            'sys/module',
ba407d
-            r'.*\.tar$',  # TODO: support archive unpacking
ba407d
-            # Be explicit with these tar matches to avoid matching commands
ba407d
-            r'.*\.tar\.xz',
ba407d
-            '.*.gz'
ba407d
+            'sys/module'
ba407d
         ]
ba407d
 
ba407d
     @property
ba407d
@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
ba407d
         except Exception:
ba407d
             return False
ba407d
 
ba407d
+    def remove_file(self, fname):
ba407d
+        """Remove a file from the archive. This is used when cleaner encounters
ba407d
+        a binary file, which we cannot reliably obfuscate.
ba407d
+        """
ba407d
+        full_fname = self.get_file_path(fname)
ba407d
+        # don't call a blank remove() here
ba407d
+        if full_fname:
ba407d
+            self.log_info("Removing binary file '%s' from archive" % fname)
ba407d
+            os.remove(full_fname)
ba407d
+            self.removed_file_count += 1
ba407d
+
ba407d
     def extract(self):
ba407d
         if self.is_tarfile:
ba407d
             self.report_msg("Extracting...")
ba407d
@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
ba407d
             if filename.startswith(_skip) or re.match(_skip, filename):
ba407d
                 return True
ba407d
         return False
ba407d
+
ba407d
+    def should_remove_file(self, fname):
ba407d
+        """Determine if the file should be removed or not, due to an inability
ba407d
+        to reliably obfuscate that file based on the filename.
ba407d
+
ba407d
+        :param fname:       Filename relative to the extracted archive root
ba407d
+        :type fname:        ``str``
ba407d
+
ba407d
+        :returns:   ``True`` if the file cannot be reliably obfuscated
ba407d
+        :rtype:     ``bool``
ba407d
+        """
ba407d
+        obvious_removes = [
ba407d
+            r'.*\.gz',  # TODO: support flat gz/xz extraction
ba407d
+            r'.*\.xz',
ba407d
+            r'.*\.bzip2',
ba407d
+            r'.*\.tar\..*',  # TODO: support archive unpacking
ba407d
+            r'.*\.txz$',
ba407d
+            r'.*\.tgz$',
ba407d
+            r'.*\.bin',
ba407d
+            r'.*\.journal',
ba407d
+            r'.*\~$'
ba407d
+        ]
ba407d
+
ba407d
+        # if the filename matches, it is obvious we can remove them without
ba407d
+        # doing the read test
ba407d
+        for _arc_reg in obvious_removes:
ba407d
+            if re.match(_arc_reg, fname):
ba407d
+                return True
ba407d
+
ba407d
+        return self.file_is_binary(fname)
ba407d
+
ba407d
+    def file_is_binary(self, fname):
ba407d
+        """Determine if the file is a binary file or not.
ba407d
+
ba407d
+
ba407d
+        :param fname:          Filename relative to the extracted archive root
ba407d
+        :type fname:           ``str``
ba407d
+
ba407d
+        :returns:   ``True`` if file is binary, else ``False``
ba407d
+        :rtype:     ``bool``
ba407d
+        """
ba407d
+        with open(self.get_file_path(fname), 'tr') as tfile:
ba407d
+            try:
ba407d
+                # when opened as above (tr), reading binary content will raise
ba407d
+                # an exception
ba407d
+                tfile.read(1)
ba407d
+                return False
ba407d
+            except UnicodeDecodeError:
ba407d
+                return True
ba407d
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ba407d
index 9884836c..469db60d 100644
ba407d
--- a/sos/collector/__init__.py
ba407d
+++ b/sos/collector/__init__.py
ba407d
@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
ba407d
         'jobs': 4,
ba407d
         'keywords': [],
ba407d
         'keyword_file': None,
ba407d
+        'keep_binary_files': False,
ba407d
         'label': '',
ba407d
         'list_options': False,
ba407d
         'log_size': 0,
ba407d
@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
ba407d
                                  dest='clean',
ba407d
                                  default=False, action='store_true',
ba407d
                                  help='Obfuscate sensistive information')
ba407d
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
ba407d
+                                 action='store_true', dest='keep_binary_files',
ba407d
+                                 help='Keep unprocessable binary files in the '
ba407d
+                                      'archive instead of removing them')
ba407d
         cleaner_grp.add_argument('--domains', dest='domains', default=[],
ba407d
                                  action='extend',
ba407d
                                  help='Additional domain names to obfuscate')
ba407d
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
ba407d
index d4345409..2cedc76e 100644
ba407d
--- a/sos/report/__init__.py
ba407d
+++ b/sos/report/__init__.py
ba407d
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
ba407d
         'case_id': '',
ba407d
         'chroot': 'auto',
ba407d
         'clean': False,
ba407d
+        'keep_binary_files': False,
ba407d
         'desc': '',
ba407d
         'domains': [],
ba407d
         'dry_run': False,
ba407d
@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
ba407d
                                  default='/etc/sos/cleaner/default_mapping',
ba407d
                                  help=('Provide a previously generated mapping'
ba407d
                                        ' file for obfuscation'))
ba407d
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
ba407d
+                                 action='store_true',
ba407d
+                                 dest='keep_binary_files',
ba407d
+                                 help='Keep unprocessable binary files in the '
ba407d
+                                      'archive instead of removing them')
ba407d
         cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
ba407d
                                  action='extend',
ba407d
                                  help='List of usernames to obfuscate')
ba407d
ba407d
From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Fri, 11 Jun 2021 23:20:59 -0400
ba407d
Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options
ba407d
ba407d
Like we now do for primary nodes, add the ability to individually set
ba407d
node options via a new `set_node_options()` method for when blanket
ba407d
setting options across all nodes via the options class attrs is not
ba407d
sufficient.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/clusters/__init__.py | 10 ++++++++++
ba407d
 sos/collector/sosnode.py           |  6 ++++--
ba407d
 2 files changed, 14 insertions(+), 2 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ba407d
index 90e62d79..c4da1ab8 100644
ba407d
--- a/sos/collector/clusters/__init__.py
ba407d
+++ b/sos/collector/clusters/__init__.py
ba407d
@@ -137,6 +137,16 @@ class Cluster():
ba407d
         """
ba407d
         self.cluster_ssh_key = key
ba407d
 
ba407d
+    def set_node_options(self, node):
ba407d
+        """If there is a need to set specific options on ONLY the non-primary
ba407d
+        nodes in a collection, override this method in the cluster profile
ba407d
+        and do that here.
ba407d
+
ba407d
+        :param node:        The non-primary node
ba407d
+        :type node:         ``SoSNode``
ba407d
+        """
ba407d
+        pass
ba407d
+
ba407d
     def set_master_options(self, node):
ba407d
         """If there is a need to set specific options in the sos command being
ba407d
         run on the cluster's master nodes, override this method in the cluster
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 1fc03076..7e784aa1 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -657,6 +657,8 @@ class SosNode():
ba407d
         # set master-only options
ba407d
         if self.cluster.check_node_is_master(self):
ba407d
             self.cluster.set_master_options(self)
ba407d
+        else:
ba407d
+            self.cluster.set_node_options(self)
ba407d
 
ba407d
     def finalize_sos_cmd(self):
ba407d
         """Use host facts and compare to the cluster type to modify the sos
ba407d
@@ -713,13 +715,13 @@ class SosNode():
ba407d
                 sos_opts.append('--cmd-timeout=%s'
ba407d
                                 % quote(str(self.opts.cmd_timeout)))
ba407d
 
ba407d
+        self.update_cmd_from_cluster()
ba407d
+
ba407d
         sos_cmd = sos_cmd.replace(
ba407d
             'sosreport',
ba407d
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ba407d
         )
ba407d
 
ba407d
-        self.update_cmd_from_cluster()
ba407d
-
ba407d
         if self.opts.only_plugins:
ba407d
             plugs = [o for o in self.opts.only_plugins
ba407d
                      if self._plugin_exists(o)]
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Sat, 12 Jun 2021 00:02:36 -0400
ba407d
Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()`
ba407d
ba407d
Updates `run_command()` to support passing new environment variables to
ba407d
the command being run, for that command alone. This parameter takes a
ba407d
dict, and if set we will first copy the existing set of env vars on the
ba407d
node and then update that set of variables using the passed dict.
ba407d
ba407d
Additionally, `execute_sos_command()` will now try to pass a new
ba407d
`sos_env_vars` dict (default empty) so that clusters may set environment
ba407d
variables specifically for the sos command being run, without having to
ba407d
modify the actual sos command being executed.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/sosnode.py | 27 ++++++++++++++++++++++++---
ba407d
 1 file changed, 24 insertions(+), 3 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 7e784aa1..40472a4e 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -45,6 +45,8 @@ class SosNode():
ba407d
         self.host = None
ba407d
         self.cluster = None
ba407d
         self.hostname = None
ba407d
+        self.sos_env_vars = {}
ba407d
+        self._env_vars = {}
ba407d
         self._password = password or self.opts.password
ba407d
         if not self.opts.nopasswd_sudo and not self.opts.sudo_pw:
ba407d
             self.opts.sudo_pw = self._password
ba407d
@@ -109,6 +111,21 @@ class SosNode():
ba407d
     def _fmt_msg(self, msg):
ba407d
         return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg)
ba407d
 
ba407d
+    @property
ba407d
+    def env_vars(self):
ba407d
+        if not self._env_vars:
ba407d
+            if self.local:
ba407d
+                self._env_vars = os.environ.copy()
ba407d
+            else:
ba407d
+                ret = self.run_command("env --null")
ba407d
+                if ret['status'] == 0:
ba407d
+                    for ln in ret['output'].split('\x00'):
ba407d
+                        if not ln:
ba407d
+                            continue
ba407d
+                        _val = ln.split('=')
ba407d
+                        self._env_vars[_val[0]] = _val[1]
ba407d
+        return self._env_vars
ba407d
+
ba407d
     def set_node_manifest(self, manifest):
ba407d
         """Set the manifest section that this node will write to
ba407d
         """
ba407d
@@ -404,7 +421,7 @@ class SosNode():
ba407d
         return self.host.package_manager.pkg_by_name(pkg) is not None
ba407d
 
ba407d
     def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
ba407d
-                    force_local=False, use_container=False):
ba407d
+                    force_local=False, use_container=False, env=None):
ba407d
         """Runs a given cmd, either via the SSH session or locally
ba407d
 
ba407d
         Arguments:
ba407d
@@ -446,7 +463,10 @@ class SosNode():
ba407d
         else:
ba407d
             if get_pty:
ba407d
                 cmd = "/bin/bash -c %s" % quote(cmd)
ba407d
-        res = pexpect.spawn(cmd, encoding='utf-8')
ba407d
+        if env:
ba407d
+            _cmd_env = self.env_vars
ba407d
+            _cmd_env.update(env)
ba407d
+        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
ba407d
         if need_root:
ba407d
             if self.need_sudo:
ba407d
                 res.sendline(self.opts.sudo_pw)
ba407d
@@ -830,7 +850,8 @@ class SosNode():
ba407d
             res = self.run_command(self.sos_cmd,
ba407d
                                    timeout=self.opts.timeout,
ba407d
                                    get_pty=True, need_root=True,
ba407d
-                                   use_container=True)
ba407d
+                                   use_container=True,
ba407d
+                                   env=self.sos_env_vars)
ba407d
             if res['status'] == 0:
ba407d
                 for line in res['stdout'].splitlines():
ba407d
                     if fnmatch.fnmatch(line, '*sosreport-*tar*'):
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Tue, 15 Jun 2021 12:43:27 -0400
ba407d
Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options
ba407d
 between nodes
ba407d
ba407d
This commit overhauls the function of `finalize_sos_cmd()` in several
ba407d
ways.
ba407d
ba407d
First, assign the sos report plugin related options directly to private
ba407d
copies of those values for each node, so that the shared cluster profile
ba407d
does not clober options between nodes.
ba407d
ba407d
Second, provide a default Lock mechanism for clusters that need to
ba407d
perform some node-comparison logic when assigning options based on node
ba407d
role.
ba407d
ba407d
Finally, finalize the sos command for each node _prior_ to the call to
ba407d
`SoSNode.sosreport()` so that we can be sure that clusters are able to
ba407d
appropriately compare and assign sos options across nodes before some
ba407d
nodes have already started and/or finished their own sos report
ba407d
collections.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/__init__.py          | 14 +++++
ba407d
 sos/collector/clusters/__init__.py |  2 +
ba407d
 sos/collector/sosnode.py           | 89 +++++++++++++++++-------------
ba407d
 3 files changed, 67 insertions(+), 38 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
ba407d
index 469db60d..7b8cfcf7 100644
ba407d
--- a/sos/collector/__init__.py
ba407d
+++ b/sos/collector/__init__.py
ba407d
@@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to.
ba407d
                              "concurrently\n"
ba407d
                              % (self.report_num, self.opts.jobs))
ba407d
 
ba407d
+            npool = ThreadPoolExecutor(self.opts.jobs)
ba407d
+            npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1)
ba407d
+            npool.shutdown(wait=True)
ba407d
+
ba407d
             pool = ThreadPoolExecutor(self.opts.jobs)
ba407d
             pool.map(self._collect, self.client_list, chunksize=1)
ba407d
             pool.shutdown(wait=True)
ba407d
@@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to.
ba407d
             except Exception as err:
ba407d
                 self.ui_log.error("Upload attempt failed: %s" % err)
ba407d
 
ba407d
+    def _finalize_sos_cmd(self, client):
ba407d
+        """Calls finalize_sos_cmd() on each node so that we have the final
ba407d
+        command before we thread out the actual execution of sos
ba407d
+        """
ba407d
+        try:
ba407d
+            client.finalize_sos_cmd()
ba407d
+        except Exception as err:
ba407d
+            self.log_error("Could not finalize sos command for %s: %s"
ba407d
+                           % (client.address, err))
ba407d
+
ba407d
     def _collect(self, client):
ba407d
         """Runs sosreport on each node"""
ba407d
         try:
ba407d
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
ba407d
index c4da1ab8..bb728bc0 100644
ba407d
--- a/sos/collector/clusters/__init__.py
ba407d
+++ b/sos/collector/clusters/__init__.py
ba407d
@@ -11,6 +11,7 @@
ba407d
 import logging
ba407d
 
ba407d
 from sos.options import ClusterOption
ba407d
+from threading import Lock
ba407d
 
ba407d
 
ba407d
 class Cluster():
ba407d
@@ -66,6 +67,7 @@ class Cluster():
ba407d
             if cls.__name__ != 'Cluster':
ba407d
                 self.cluster_type.append(cls.__name__)
ba407d
         self.node_list = None
ba407d
+        self.lock = Lock()
ba407d
         self.soslog = logging.getLogger('sos')
ba407d
         self.ui_log = logging.getLogger('sos_ui')
ba407d
         self.options = []
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 40472a4e..1c25cc34 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -38,6 +38,7 @@ class SosNode():
ba407d
         self.address = address.strip()
ba407d
         self.commons = commons
ba407d
         self.opts = commons['cmdlineopts']
ba407d
+        self._assign_config_opts()
ba407d
         self.tmpdir = commons['tmpdir']
ba407d
         self.hostlen = commons['hostlen']
ba407d
         self.need_sudo = commons['need_sudo']
ba407d
@@ -465,8 +466,8 @@ class SosNode():
ba407d
                 cmd = "/bin/bash -c %s" % quote(cmd)
ba407d
         if env:
ba407d
             _cmd_env = self.env_vars
ba407d
-            _cmd_env.update(env)
ba407d
-        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
ba407d
+            env = _cmd_env.update(env)
ba407d
+        res = pexpect.spawn(cmd, encoding='utf-8', env=env)
ba407d
         if need_root:
ba407d
             if self.need_sudo:
ba407d
                 res.sendline(self.opts.sudo_pw)
ba407d
@@ -484,9 +485,6 @@ class SosNode():
ba407d
 
ba407d
     def sosreport(self):
ba407d
         """Run a sosreport on the node, then collect it"""
ba407d
-        self.sos_cmd = self.finalize_sos_cmd()
ba407d
-        self.log_info('Final sos command set to %s' % self.sos_cmd)
ba407d
-        self.manifest.add_field('final_sos_command', self.sos_cmd)
ba407d
         try:
ba407d
             path = self.execute_sos_command()
ba407d
             if path:
ba407d
@@ -656,29 +654,42 @@ class SosNode():
ba407d
         This will NOT override user supplied options.
ba407d
         """
ba407d
         if self.cluster.sos_preset:
ba407d
-            if not self.opts.preset:
ba407d
-                self.opts.preset = self.cluster.sos_preset
ba407d
+            if not self.preset:
ba407d
+                self.preset = self.cluster.sos_preset
ba407d
             else:
ba407d
                 self.log_info('Cluster specified preset %s but user has also '
ba407d
                               'defined a preset. Using user specification.'
ba407d
                               % self.cluster.sos_preset)
ba407d
         if self.cluster.sos_plugins:
ba407d
             for plug in self.cluster.sos_plugins:
ba407d
-                if plug not in self.opts.enable_plugins:
ba407d
-                    self.opts.enable_plugins.append(plug)
ba407d
+                if plug not in self.enable_plugins:
ba407d
+                    self.enable_plugins.append(plug)
ba407d
 
ba407d
         if self.cluster.sos_plugin_options:
ba407d
             for opt in self.cluster.sos_plugin_options:
ba407d
-                if not any(opt in o for o in self.opts.plugin_options):
ba407d
+                if not any(opt in o for o in self.plugin_options):
ba407d
                     option = '%s=%s' % (opt,
ba407d
                                         self.cluster.sos_plugin_options[opt])
ba407d
-                    self.opts.plugin_options.append(option)
ba407d
+                    self.plugin_options.append(option)
ba407d
 
ba407d
         # set master-only options
ba407d
         if self.cluster.check_node_is_master(self):
ba407d
-            self.cluster.set_master_options(self)
ba407d
+            with self.cluster.lock:
ba407d
+                self.cluster.set_master_options(self)
ba407d
         else:
ba407d
-            self.cluster.set_node_options(self)
ba407d
+            with self.cluster.lock:
ba407d
+                self.cluster.set_node_options(self)
ba407d
+
ba407d
+    def _assign_config_opts(self):
ba407d
+        """From the global opts configuration, assign those values locally
ba407d
+        to this node so that they may be acted on individually.
ba407d
+        """
ba407d
+        # assign these to new, private copies
ba407d
+        self.only_plugins = list(self.opts.only_plugins)
ba407d
+        self.skip_plugins = list(self.opts.skip_plugins)
ba407d
+        self.enable_plugins = list(self.opts.enable_plugins)
ba407d
+        self.plugin_options = list(self.opts.plugin_options)
ba407d
+        self.preset = list(self.opts.preset)
ba407d
 
ba407d
     def finalize_sos_cmd(self):
ba407d
         """Use host facts and compare to the cluster type to modify the sos
ba407d
@@ -742,59 +753,61 @@ class SosNode():
ba407d
             os.path.join(self.host.sos_bin_path, self.sos_bin)
ba407d
         )
ba407d
 
ba407d
-        if self.opts.only_plugins:
ba407d
-            plugs = [o for o in self.opts.only_plugins
ba407d
-                     if self._plugin_exists(o)]
ba407d
-            if len(plugs) != len(self.opts.only_plugins):
ba407d
-                not_only = list(set(self.opts.only_plugins) - set(plugs))
ba407d
+        if self.only_plugins:
ba407d
+            plugs = [o for o in self.only_plugins if self._plugin_exists(o)]
ba407d
+            if len(plugs) != len(self.only_plugins):
ba407d
+                not_only = list(set(self.only_plugins) - set(plugs))
ba407d
                 self.log_debug('Requested plugins %s were requested to be '
ba407d
                                'enabled but do not exist' % not_only)
ba407d
-            only = self._fmt_sos_opt_list(self.opts.only_plugins)
ba407d
+            only = self._fmt_sos_opt_list(self.only_plugins)
ba407d
             if only:
ba407d
                 sos_opts.append('--only-plugins=%s' % quote(only))
ba407d
-            return "%s %s" % (sos_cmd, ' '.join(sos_opts))
ba407d
+            self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ba407d
+            self.log_info('Final sos command set to %s' % self.sos_cmd)
ba407d
+            self.manifest.add_field('final_sos_command', self.sos_cmd)
ba407d
+            return
ba407d
 
ba407d
-        if self.opts.skip_plugins:
ba407d
+        if self.skip_plugins:
ba407d
             # only run skip-plugins for plugins that are enabled
ba407d
-            skip = [o for o in self.opts.skip_plugins
ba407d
-                    if self._check_enabled(o)]
ba407d
-            if len(skip) != len(self.opts.skip_plugins):
ba407d
-                not_skip = list(set(self.opts.skip_plugins) - set(skip))
ba407d
+            skip = [o for o in self.skip_plugins if self._check_enabled(o)]
ba407d
+            if len(skip) != len(self.skip_plugins):
ba407d
+                not_skip = list(set(self.skip_plugins) - set(skip))
ba407d
                 self.log_debug('Requested to skip plugins %s, but plugins are '
ba407d
                                'already not enabled' % not_skip)
ba407d
             skipln = self._fmt_sos_opt_list(skip)
ba407d
             if skipln:
ba407d
                 sos_opts.append('--skip-plugins=%s' % quote(skipln))
ba407d
 
ba407d
-        if self.opts.enable_plugins:
ba407d
+        if self.enable_plugins:
ba407d
             # only run enable for plugins that are disabled
ba407d
-            opts = [o for o in self.opts.enable_plugins
ba407d
-                    if o not in self.opts.skip_plugins
ba407d
+            opts = [o for o in self.enable_plugins
ba407d
+                    if o not in self.skip_plugins
ba407d
                     and self._check_disabled(o) and self._plugin_exists(o)]
ba407d
-            if len(opts) != len(self.opts.enable_plugins):
ba407d
-                not_on = list(set(self.opts.enable_plugins) - set(opts))
ba407d
+            if len(opts) != len(self.enable_plugins):
ba407d
+                not_on = list(set(self.enable_plugins) - set(opts))
ba407d
                 self.log_debug('Requested to enable plugins %s, but plugins '
ba407d
                                'are already enabled or do not exist' % not_on)
ba407d
             enable = self._fmt_sos_opt_list(opts)
ba407d
             if enable:
ba407d
                 sos_opts.append('--enable-plugins=%s' % quote(enable))
ba407d
 
ba407d
-        if self.opts.plugin_options:
ba407d
-            opts = [o for o in self.opts.plugin_options
ba407d
+        if self.plugin_options:
ba407d
+            opts = [o for o in self.plugin_options
ba407d
                     if self._plugin_exists(o.split('.')[0])
ba407d
                     and self._plugin_option_exists(o.split('=')[0])]
ba407d
             if opts:
ba407d
                 sos_opts.append('-k %s' % quote(','.join(o for o in opts)))
ba407d
 
ba407d
-        if self.opts.preset:
ba407d
-            if self._preset_exists(self.opts.preset):
ba407d
-                sos_opts.append('--preset=%s' % quote(self.opts.preset))
ba407d
+        if self.preset:
ba407d
+            if self._preset_exists(self.preset):
ba407d
+                sos_opts.append('--preset=%s' % quote(self.preset))
ba407d
             else:
ba407d
                 self.log_debug('Requested to enable preset %s but preset does '
ba407d
-                               'not exist on node' % self.opts.preset)
ba407d
+                               'not exist on node' % self.preset)
ba407d
 
ba407d
-        _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ba407d
-        return _sos_cmd
ba407d
+        self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
ba407d
+        self.log_info('Final sos command set to %s' % self.sos_cmd)
ba407d
+        self.manifest.add_field('final_sos_command', self.sos_cmd)
ba407d
 
ba407d
     def determine_sos_label(self):
ba407d
         """Determine what, if any, label should be added to the sosreport"""
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Tue, 15 Jun 2021 18:38:34 -0400
ba407d
Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation
ba407d
ba407d
Improves handling of symlink obfuscation by only performing the
ba407d
obfuscaiton on the ultimate target of any symlinks encountered. Now,
ba407d
when a symlink is encountered, clean will obfuscate the link name and
ba407d
re-write it in the archive, pointing to the (potentially obfuscated)
ba407d
target name.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------
ba407d
 1 file changed, 46 insertions(+), 19 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index abfb684b..b38c8dfc 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -612,28 +612,55 @@ third party.
ba407d
         if not filename:
ba407d
             # the requested file doesn't exist in the archive
ba407d
             return
ba407d
-        self.log_debug("Obfuscating %s" % short_name or filename,
ba407d
-                       caller=arc_name)
ba407d
         subs = 0
ba407d
-        tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ba407d
-        with open(filename, 'r') as fname:
ba407d
-            for line in fname:
ba407d
-                try:
ba407d
-                    line, count = self.obfuscate_line(line)
ba407d
-                    subs += count
ba407d
-                    tfile.write(line)
ba407d
-                except Exception as err:
ba407d
-                    self.log_debug("Unable to obfuscate %s: %s"
ba407d
-                                   % (short_name, err), caller=arc_name)
ba407d
-        tfile.seek(0)
ba407d
-        if subs:
ba407d
-            shutil.copy(tfile.name, filename)
ba407d
-        tfile.close()
ba407d
-        _ob_filename = self.obfuscate_string(short_name)
ba407d
-        if _ob_filename != short_name:
ba407d
+        if not os.path.islink(filename):
ba407d
+            # don't run the obfuscation on the link, but on the actual file
ba407d
+            # at some other point.
ba407d
+            self.log_debug("Obfuscating %s" % short_name or filename,
ba407d
+                           caller=arc_name)
ba407d
+            tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
ba407d
+            with open(filename, 'r') as fname:
ba407d
+                for line in fname:
ba407d
+                    try:
ba407d
+                        line, count = self.obfuscate_line(line)
ba407d
+                        subs += count
ba407d
+                        tfile.write(line)
ba407d
+                    except Exception as err:
ba407d
+                        self.log_debug("Unable to obfuscate %s: %s"
ba407d
+                                       % (short_name, err), caller=arc_name)
ba407d
+            tfile.seek(0)
ba407d
+            if subs:
ba407d
+                shutil.copy(tfile.name, filename)
ba407d
+            tfile.close()
ba407d
+
ba407d
+        _ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
ba407d
+        _ob_filename = short_name.replace(short_name.split('/')[-1],
ba407d
+                                          _ob_short_name)
ba407d
+        _sym_changed = False
ba407d
+        if os.path.islink(filename):
ba407d
+            _link = os.readlink(filename)
ba407d
+            _ob_link = self.obfuscate_string(_link)
ba407d
+            if _ob_link != _link:
ba407d
+                _sym_changed = True
ba407d
+
ba407d
+        if (_ob_filename != short_name) or _sym_changed:
ba407d
             arc_path = filename.split(short_name)[0]
ba407d
             _ob_path = os.path.join(arc_path, _ob_filename)
ba407d
-            os.rename(filename, _ob_path)
ba407d
+            # ensure that any plugin subdirs that contain obfuscated strings
ba407d
+            # get created with obfuscated counterparts
ba407d
+            if not os.path.islink(filename):
ba407d
+                os.rename(filename, _ob_path)
ba407d
+            else:
ba407d
+                # generate the obfuscated name of the link target
ba407d
+                _target_ob = self.obfuscate_string(os.readlink(filename))
ba407d
+                # remove the unobfuscated original symlink first, in case the
ba407d
+                # symlink name hasn't changed but the target has
ba407d
+                os.remove(filename)
ba407d
+                # create the newly obfuscated symlink, pointing to the
ba407d
+                # obfuscated target name, which may not exist just yet, but
ba407d
+                # when the actual file is obfuscated, will be created
ba407d
+                os.symlink(_target_ob, _ob_path)
ba407d
+
ba407d
         return subs
ba407d
 
ba407d
     def obfuscate_string(self, string_data):
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Tue, 15 Jun 2021 22:56:19 -0400
ba407d
Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match
ba407d
 first
ba407d
ba407d
When matching strings in parsers to do obfuscation, we should be using
ba407d
the most precise matches found first, rather than matching in the order
ba407d
a match is hit. This ensures that we correctly obfuscate an entire
ba407d
string, rather than potentially only partial substring(s) that exist
ba407d
within the entire match.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/parsers/__init__.py        | 10 +++++++---
ba407d
 sos/cleaner/parsers/keyword_parser.py  |  2 +-
ba407d
 sos/cleaner/parsers/username_parser.py |  2 +-
ba407d
 3 files changed, 9 insertions(+), 5 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ba407d
index c77300aa..cfa20b95 100644
ba407d
--- a/sos/cleaner/parsers/__init__.py
ba407d
+++ b/sos/cleaner/parsers/__init__.py
ba407d
@@ -82,10 +82,12 @@ class SoSCleanerParser():
ba407d
         for pattern in self.regex_patterns:
ba407d
             matches = [m[0] for m in re.findall(pattern, line, re.I)]
ba407d
             if matches:
ba407d
+                matches.sort(reverse=True, key=lambda x: len(x))
ba407d
                 count += len(matches)
ba407d
                 for match in matches:
ba407d
-                    new_match = self.mapping.get(match.strip())
ba407d
-                    line = line.replace(match.strip(), new_match)
ba407d
+                    match = match.strip()
ba407d
+                    new_match = self.mapping.get(match)
ba407d
+                    line = line.replace(match, new_match)
ba407d
         return line, count
ba407d
 
ba407d
     def parse_string_for_keys(self, string_data):
ba407d
@@ -102,7 +104,9 @@ class SoSCleanerParser():
ba407d
         :returns: The obfuscated line
ba407d
         :rtype: ``str``
ba407d
         """
ba407d
-        for key, val in self.mapping.dataset.items():
ba407d
+        for pair in sorted(self.mapping.dataset.items(), reverse=True,
ba407d
+                           key=lambda x: len(x[0])):
ba407d
+            key, val = pair
ba407d
             if key in string_data:
ba407d
                 string_data = string_data.replace(key, val)
ba407d
         return string_data
ba407d
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
ba407d
index 3dc2b7f0..9134f82d 100644
ba407d
--- a/sos/cleaner/parsers/keyword_parser.py
ba407d
+++ b/sos/cleaner/parsers/keyword_parser.py
ba407d
@@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser):
ba407d
 
ba407d
     def parse_line(self, line):
ba407d
         count = 0
ba407d
-        for keyword in self.user_keywords:
ba407d
+        for keyword in sorted(self.user_keywords, reverse=True):
ba407d
             if keyword in line:
ba407d
                 line = line.replace(keyword, self.mapping.get(keyword))
ba407d
                 count += 1
ba407d
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
ba407d
index 2bb6c7f3..0c3bbac4 100644
ba407d
--- a/sos/cleaner/parsers/username_parser.py
ba407d
+++ b/sos/cleaner/parsers/username_parser.py
ba407d
@@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser):
ba407d
 
ba407d
     def parse_line(self, line):
ba407d
         count = 0
ba407d
-        for username in self.mapping.dataset.keys():
ba407d
+        for username in sorted(self.mapping.dataset.keys(), reverse=True):
ba407d
             if username in line:
ba407d
                 count = line.count(username)
ba407d
                 line = line.replace(username, self.mapping.get(username))
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 16 Jun 2021 01:15:45 -0400
ba407d
Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within
ba407d
 archives
ba407d
ba407d
This commits adds a step to `obfuscate_report()` that explicitly walks
ba407d
through all directories in the archive, and obfuscates the directory
ba407d
names if necessary.
ba407d
ba407d
Since this uses `obfuscate_string()` for the directory names, a
ba407d
`skip_keys` list has been added to maps to allow parsers/maps to
ba407d
specify matched keys (such as short names for the hostname parser) that
ba407d
should not be considered when obfuscating directory names (e.g. 'www').
ba407d
ba407d
Closes: #2465
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/__init__.py              | 26 ++++++++++++++++++++++++++
ba407d
 sos/cleaner/mappings/__init__.py     |  4 +++-
ba407d
 sos/cleaner/mappings/hostname_map.py |  5 +++++
ba407d
 sos/cleaner/obfuscation_archive.py   | 20 ++++++++++++++++++--
ba407d
 sos/cleaner/parsers/__init__.py      |  2 ++
ba407d
 5 files changed, 54 insertions(+), 3 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index b38c8dfc..88d4d0ea 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -562,6 +562,11 @@ third party.
ba407d
                 except Exception as err:
ba407d
                     self.log_debug("Unable to parse file %s: %s"
ba407d
                                    % (short_name, err))
ba407d
+            try:
ba407d
+                self.obfuscate_directory_names(archive)
ba407d
+            except Exception as err:
ba407d
+                self.log_info("Failed to obfuscate directories: %s" % err,
ba407d
+                              caller=archive.archive_name)
ba407d
 
ba407d
             # if the archive was already a tarball, repack it
ba407d
             method = archive.get_compression()
ba407d
@@ -663,6 +668,27 @@ third party.
ba407d
 
ba407d
         return subs
ba407d
 
ba407d
+    def obfuscate_directory_names(self, archive):
ba407d
+        """For all directories that exist within the archive, obfuscate the
ba407d
+        directory name if it contains sensitive strings found during execution
ba407d
+        """
ba407d
+        self.log_info("Obfuscating directory names in archive %s"
ba407d
+                      % archive.archive_name)
ba407d
+        for dirpath in sorted(archive.get_directory_list(), reverse=True):
ba407d
+            for _name in os.listdir(dirpath):
ba407d
+                _dirname = os.path.join(dirpath, _name)
ba407d
+                _arc_dir = _dirname.split(archive.extracted_path)[-1]
ba407d
+                if os.path.isdir(_dirname):
ba407d
+                    _ob_dirname = self.obfuscate_string(_name)
ba407d
+                    if _ob_dirname != _name:
ba407d
+                        _ob_arc_dir = _arc_dir.rstrip(_name)
ba407d
+                        _ob_arc_dir = os.path.join(
ba407d
+                            archive.extracted_path,
ba407d
+                            _ob_arc_dir.lstrip('/'),
ba407d
+                            _ob_dirname
ba407d
+                        )
ba407d
+                        os.rename(_dirname, _ob_arc_dir)
ba407d
+
ba407d
     def obfuscate_string(self, string_data):
ba407d
         for parser in self.parsers:
ba407d
             try:
ba407d
diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
ba407d
index dd464e5a..5cf5c8b2 100644
ba407d
--- a/sos/cleaner/mappings/__init__.py
ba407d
+++ b/sos/cleaner/mappings/__init__.py
ba407d
@@ -20,8 +20,10 @@ class SoSMap():
ba407d
     corresponding SoSMap() object, to allow for easy retrieval of obfuscated
ba407d
     items.
ba407d
     """
ba407d
-
ba407d
+    # used for regex skips in parser.parse_line()
ba407d
     ignore_matches = []
ba407d
+    # used for filename obfuscations in parser.parse_string_for_keys()
ba407d
+    skip_keys = []
ba407d
 
ba407d
     def __init__(self):
ba407d
         self.dataset = {}
ba407d
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
ba407d
index e0b7bf1d..c9a44d8d 100644
ba407d
--- a/sos/cleaner/mappings/hostname_map.py
ba407d
+++ b/sos/cleaner/mappings/hostname_map.py
ba407d
@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap):
ba407d
         '^com..*'
ba407d
     ]
ba407d
 
ba407d
+    skip_keys = [
ba407d
+        'www',
ba407d
+        'api'
ba407d
+    ]
ba407d
+
ba407d
     host_count = 0
ba407d
     domain_count = 0
ba407d
     _domains = {}
ba407d
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ba407d
index 88f978d9..90188358 100644
ba407d
--- a/sos/cleaner/obfuscation_archive.py
ba407d
+++ b/sos/cleaner/obfuscation_archive.py
ba407d
@@ -202,10 +202,22 @@ class SoSObfuscationArchive():
ba407d
         """Return a list of all files within the archive"""
ba407d
         self.file_list = []
ba407d
         for dirname, dirs, files in os.walk(self.extracted_path):
ba407d
+            for _dir in dirs:
ba407d
+                _dirpath = os.path.join(dirname, _dir)
ba407d
+                # catch dir-level symlinks
ba407d
+                if os.path.islink(_dirpath) and os.path.isdir(_dirpath):
ba407d
+                    self.file_list.append(_dirpath)
ba407d
             for filename in files:
ba407d
                 self.file_list.append(os.path.join(dirname, filename))
ba407d
         return self.file_list
ba407d
 
ba407d
+    def get_directory_list(self):
ba407d
+        """Return a list of all directories within the archive"""
ba407d
+        dir_list = []
ba407d
+        for dirname, dirs, files in os.walk(self.extracted_path):
ba407d
+            dir_list.append(dirname)
ba407d
+        return dir_list
ba407d
+
ba407d
     def update_sub_count(self, fname, count):
ba407d
         """Called when a file has finished being parsed and used to track
ba407d
         total substitutions made and number of files that had changes made
ba407d
@@ -230,7 +242,8 @@ class SoSObfuscationArchive():
ba407d
                                         archive root
ba407d
         """
ba407d
 
ba407d
-        if not os.path.isfile(self.get_file_path(filename)):
ba407d
+        if (not os.path.isfile(self.get_file_path(filename)) and not
ba407d
+                os.path.islink(self.get_file_path(filename))):
ba407d
             return True
ba407d
 
ba407d
         for _skip in self.skip_list:
ba407d
@@ -266,7 +279,10 @@ class SoSObfuscationArchive():
ba407d
             if re.match(_arc_reg, fname):
ba407d
                 return True
ba407d
 
ba407d
-        return self.file_is_binary(fname)
ba407d
+        if os.path.isfile(self.get_file_path(fname)):
ba407d
+            return self.file_is_binary(fname)
ba407d
+        # don't fail on dir-level symlinks
ba407d
+        return False
ba407d
 
ba407d
     def file_is_binary(self, fname):
ba407d
         """Determine if the file is a binary file or not.
ba407d
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ba407d
index cfa20b95..84874475 100644
ba407d
--- a/sos/cleaner/parsers/__init__.py
ba407d
+++ b/sos/cleaner/parsers/__init__.py
ba407d
@@ -107,6 +107,8 @@ class SoSCleanerParser():
ba407d
         for pair in sorted(self.mapping.dataset.items(), reverse=True,
ba407d
                            key=lambda x: len(x[0])):
ba407d
             key, val = pair
ba407d
+            if key in self.mapping.skip_keys:
ba407d
+                continue
ba407d
             if key in string_data:
ba407d
                 string_data = string_data.replace(key, val)
ba407d
         return string_data
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Wed, 16 Jun 2021 02:24:51 -0400
ba407d
Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with
ba407d
 FQDN substrings
ba407d
ba407d
This commit updates the hostname parser and associated map to be able to
ba407d
better detect and obfuscate FQDN substrings within file content and file
ba407d
names, particularly when the regex patterns failed to match a hostname
ba407d
that is formatted with '_' characters rather than '.' characters.
ba407d
ba407d
The `get()` method has been updated to alow preserve characters and
ba407d
certain extensions that are not part of the FQDN, but are brought in by
ba407d
the regex pattern due to the fact that we need to use word boundary
ba407d
indicators within the pattern.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/mappings/hostname_map.py   | 59 +++++++++++++++++++++++---
ba407d
 sos/cleaner/parsers/__init__.py        |  3 +-
ba407d
 sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++---
ba407d
 3 files changed, 81 insertions(+), 11 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
ba407d
index c9a44d8d..d4b2c88e 100644
ba407d
--- a/sos/cleaner/mappings/hostname_map.py
ba407d
+++ b/sos/cleaner/mappings/hostname_map.py
ba407d
@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap):
ba407d
         host = domain.split('.')
ba407d
         if len(host) == 1:
ba407d
             # don't block on host's shortname
ba407d
-            return True
ba407d
+            return host[0] in self.hosts.keys()
ba407d
         else:
ba407d
             domain = host[0:-1]
ba407d
             for known_domain in self._domains:
ba407d
@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap):
ba407d
         return False
ba407d
 
ba407d
     def get(self, item):
ba407d
-        if item.startswith(('.', '_')):
ba407d
-            item = item.lstrip('._')
ba407d
-        item = item.strip()
ba407d
+        prefix = ''
ba407d
+        suffix = ''
ba407d
+        final = None
ba407d
+        # The regex pattern match may include a leading and/or trailing '_'
ba407d
+        # character due to the need to use word boundary matching, so we need
ba407d
+        # to strip these from the string during processing, but still keep them
ba407d
+        # in the returned string to not mangle the string replacement in the
ba407d
+        # context of the file or filename
ba407d
+        while item.startswith(('.', '_')):
ba407d
+            prefix += item[0]
ba407d
+            item = item[1:]
ba407d
+        while item.endswith(('.', '_')):
ba407d
+            suffix += item[-1]
ba407d
+            item = item[0:-1]
ba407d
         if not self.domain_name_in_loaded_domains(item.lower()):
ba407d
             return item
ba407d
-        return super(SoSHostnameMap, self).get(item)
ba407d
+        if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
ba407d
+            ext = '.' + item.split('.')[-1]
ba407d
+            item = item.replace(ext, '')
ba407d
+            suffix += ext
ba407d
+        if item not in self.dataset.keys():
ba407d
+            # try to account for use of '-' in names that include hostnames
ba407d
+            # and don't create new mappings for each of these
ba407d
+            for _existing in sorted(self.dataset.keys(), reverse=True,
ba407d
+                                    key=lambda x: len(x)):
ba407d
+                _host_substr = False
ba407d
+                _test = item.split(_existing)
ba407d
+                _h = _existing.split('.')
ba407d
+                # avoid considering a full FQDN match as a new match off of
ba407d
+                # the hostname of an existing match
ba407d
+                if _h[0] and _h[0] in self.hosts.keys():
ba407d
+                    _host_substr = True
ba407d
+                if len(_test) == 1 or not _test[0]:
ba407d
+                    # does not match existing obfuscation
ba407d
+                    continue
ba407d
+                elif _test[0].endswith('.') and not _host_substr:
ba407d
+                    # new hostname in known domain
ba407d
+                    final = super(SoSHostnameMap, self).get(item)
ba407d
+                    break
ba407d
+                elif item.split(_test[0]):
ba407d
+                    # string that includes existing FQDN obfuscation substring
ba407d
+                    # so, only obfuscate the FQDN part
ba407d
+                    try:
ba407d
+                        itm = item.split(_test[0])[1]
ba407d
+                        final = _test[0] + super(SoSHostnameMap, self).get(itm)
ba407d
+                        break
ba407d
+                    except Exception:
ba407d
+                        # fallback to still obfuscating the entire item
ba407d
+                        pass
ba407d
+
ba407d
+        if not final:
ba407d
+            final = super(SoSHostnameMap, self).get(item)
ba407d
+        return prefix + final + suffix
ba407d
 
ba407d
     def sanitize_item(self, item):
ba407d
         host = item.split('.')
ba407d
@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap):
ba407d
         """Obfuscate the short name of the host with an incremented counter
ba407d
         based on the total number of obfuscated host names
ba407d
         """
ba407d
+        if not hostname:
ba407d
+            return hostname
ba407d
         if hostname not in self.hosts:
ba407d
             ob_host = "host%s" % self.host_count
ba407d
             self.hosts[hostname] = ob_host
ba407d
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
ba407d
index 84874475..57d2020a 100644
ba407d
--- a/sos/cleaner/parsers/__init__.py
ba407d
+++ b/sos/cleaner/parsers/__init__.py
ba407d
@@ -87,7 +87,8 @@ class SoSCleanerParser():
ba407d
                 for match in matches:
ba407d
                     match = match.strip()
ba407d
                     new_match = self.mapping.get(match)
ba407d
-                    line = line.replace(match, new_match)
ba407d
+                    if new_match != match:
ba407d
+                        line = line.replace(match, new_match)
ba407d
         return line, count
ba407d
 
ba407d
     def parse_string_for_keys(self, string_data):
ba407d
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
ba407d
index 9982024b..3de6bb08 100644
ba407d
--- a/sos/cleaner/parsers/hostname_parser.py
ba407d
+++ b/sos/cleaner/parsers/hostname_parser.py
ba407d
@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser):
ba407d
     map_file_key = 'hostname_map'
ba407d
     prep_map_file = 'sos_commands/host/hostname'
ba407d
     regex_patterns = [
ba407d
-        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'
ba407d
+        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
ba407d
     ]
ba407d
 
ba407d
     def __init__(self, conf_file=None, opt_domains=None):
ba407d
@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser):
ba407d
         """Override the default parse_line() method to also check for the
ba407d
         shortname of the host derived from the hostname.
ba407d
         """
ba407d
+
ba407d
+        def _check_line(ln, count, search, repl=None):
ba407d
+            """Perform a second manual check for substrings that may have been
ba407d
+            missed by regex matching
ba407d
+            """
ba407d
+            if search in self.mapping.skip_keys:
ba407d
+                return ln, count
ba407d
+            if search in ln:
ba407d
+                count += ln.count(search)
ba407d
+                ln = ln.replace(search, self.mapping.get(repl or search))
ba407d
+            return ln, count
ba407d
+
ba407d
         count = 0
ba407d
         line, count = super(SoSHostnameParser, self).parse_line(line)
ba407d
-        for short_name in self.short_names:
ba407d
-            if short_name in line:
ba407d
-                count += 1
ba407d
-                line = line.replace(short_name, self.mapping.get(short_name))
ba407d
+        # make an additional pass checking for '_' formatted substrings that
ba407d
+        # the regex patterns won't catch
ba407d
+        hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
ba407d
+        for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
ba407d
+            fqdn = host
ba407d
+            for c in '.-':
ba407d
+                fqdn = fqdn.replace(c, '_')
ba407d
+            line, count = _check_line(line, count, fqdn, host)
ba407d
+
ba407d
+        for short_name in sorted(self.short_names, reverse=True):
ba407d
+            line, count = _check_line(line, count, short_name)
ba407d
+
ba407d
         return line, count
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Mon, 14 Jun 2021 09:31:07 -0400
ba407d
Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile
ba407d
ba407d
Adds explicit setting of primary/node sos options for the `openshift`
ba407d
plugin within the cluster, rather than relying on default configurations
ba407d
and best practices to avoid duplicate collections.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++--
ba407d
 sos/collector/sosnode.py      |  4 +--
ba407d
 2 files changed, 65 insertions(+), 4 deletions(-)
ba407d
ba407d
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
ba407d
index 283fcfd1..ddff84a4 100644
ba407d
--- a/sos/collector/clusters/ocp.py
ba407d
+++ b/sos/collector/clusters/ocp.py
ba407d
@@ -8,6 +8,8 @@
ba407d
 #
ba407d
 # See the LICENSE file in the source distribution for further information.
ba407d
 
ba407d
+import os
ba407d
+
ba407d
 from pipes import quote
ba407d
 from sos.collector.clusters import Cluster
ba407d
 
ba407d
@@ -18,10 +20,14 @@ class ocp(Cluster):
ba407d
     cluster_name = 'OpenShift Container Platform v4'
ba407d
     packages = ('openshift-hyperkube', 'openshift-clients')
ba407d
 
ba407d
+    api_collect_enabled = False
ba407d
+    token = None
ba407d
+
ba407d
     option_list = [
ba407d
         ('label', '', 'Colon delimited list of labels to select nodes with'),
ba407d
         ('role', '', 'Colon delimited list of roles to select nodes with'),
ba407d
-        ('kubeconfig', '', 'Path to the kubeconfig file')
ba407d
+        ('kubeconfig', '', 'Path to the kubeconfig file'),
ba407d
+        ('token', '', 'Service account token to use for oc authorization')
ba407d
     ]
ba407d
 
ba407d
     def fmt_oc_cmd(self, cmd):
ba407d
@@ -32,9 +38,20 @@ class ocp(Cluster):
ba407d
             return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
ba407d
         return "oc %s" % cmd
ba407d
 
ba407d
+    def _attempt_oc_login(self):
ba407d
+        """Attempt to login to the API using the oc command using a provided
ba407d
+        token
ba407d
+        """
ba407d
+        _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True"
ba407d
+                                     " --token=%s" % self.token)
ba407d
+        return _res['status'] == 0
ba407d
+
ba407d
     def check_enabled(self):
ba407d
         if super(ocp, self).check_enabled():
ba407d
             return True
ba407d
+        self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
ba407d
+        if self.token:
ba407d
+            self._attempt_oc_login()
ba407d
         _who = self.fmt_oc_cmd('whoami')
ba407d
         return self.exec_master_cmd(_who)['status'] == 0
ba407d
 
ba407d
@@ -106,4 +123,48 @@ class ocp(Cluster):
ba407d
         return 'master' in self.node_dict[sosnode.address]['roles']
ba407d
 
ba407d
     def set_master_options(self, node):
ba407d
-        node.opts.enable_plugins.append('openshift')
ba407d
+        node.enable_plugins.append('openshift')
ba407d
+        if self.api_collect_enabled:
ba407d
+            # a primary has already been enabled for API collection, disable
ba407d
+            # it among others
ba407d
+            node.plugin_options.append('openshift.no-oc=on')
ba407d
+        else:
ba407d
+            _oc_cmd = 'oc'
ba407d
+            if node.host.containerized:
ba407d
+                _oc_cmd = '/host/bin/oc'
ba407d
+                # when run from a container, the oc command does not inherit
ba407d
+                # the default config, so if it's present then pass it here to
ba407d
+                # detect a funcitonal oc command. This is sidestepped in sos
ba407d
+                # report by being able to chroot the `oc` execution which we
ba407d
+                # cannot do remotely
ba407d
+                if node.file_exists('/root/.kube/config', need_root=True):
ba407d
+                    _oc_cmd += ' --kubeconfig /host/root/.kube/config'
ba407d
+            can_oc = node.run_command("%s whoami" % _oc_cmd,
ba407d
+                                      use_container=node.host.containerized,
ba407d
+                                      # container is available only to root
ba407d
+                                      # and if rhel, need to run sos as root
ba407d
+                                      # anyways which will run oc as root
ba407d
+                                      need_root=True)
ba407d
+            if can_oc['status'] == 0:
ba407d
+                # the primary node can already access the API
ba407d
+                self.api_collect_enabled = True
ba407d
+            elif self.token:
ba407d
+                node.sos_env_vars['SOSOCPTOKEN'] = self.token
ba407d
+                self.api_collect_enabled = True
ba407d
+            elif self.get_option('kubeconfig'):
ba407d
+                kc = self.get_option('kubeconfig')
ba407d
+                if node.file_exists(kc):
ba407d
+                    if node.host.containerized:
ba407d
+                        kc = "/host/%s" % kc
ba407d
+                    node.sos_env_vars['KUBECONFIG'] = kc
ba407d
+                    self.api_collect_enabled = True
ba407d
+            if self.api_collect_enabled:
ba407d
+                msg = ("API collections will be performed on %s\nNote: API "
ba407d
+                       "collections may extend runtime by 10s of minutes\n"
ba407d
+                       % node.address)
ba407d
+                self.soslog.info(msg)
ba407d
+                self.ui_log.info(msg)
ba407d
+
ba407d
+    def set_node_options(self, node):
ba407d
+        # don't attempt OC API collections on non-primary nodes
ba407d
+        node.plugin_options.append('openshift.no-oc=on')
ba407d
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
ba407d
index 1c25cc34..6597d236 100644
ba407d
--- a/sos/collector/sosnode.py
ba407d
+++ b/sos/collector/sosnode.py
ba407d
@@ -202,11 +202,11 @@ class SosNode():
ba407d
                 self.opts.registry_authfile or self.host.container_authfile
ba407d
             )
ba407d
 
ba407d
-    def file_exists(self, fname):
ba407d
+    def file_exists(self, fname, need_root=False):
ba407d
         """Checks for the presence of fname on the remote node"""
ba407d
         if not self.local:
ba407d
             try:
ba407d
-                res = self.run_command("stat %s" % fname)
ba407d
+                res = self.run_command("stat %s" % fname, need_root=need_root)
ba407d
                 return res['status'] == 0
ba407d
             except Exception:
ba407d
                 return False
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Thu, 17 Jun 2021 09:52:36 -0400
ba407d
Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user
ba407d
ba407d
The 'core' user is a common default user on containerized hosts, and
ba407d
obfuscation of it is not advantageous, much like the default 'ubuntu'
ba407d
user for that distribution.
ba407d
ba407d
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
ba407d
---
ba407d
 sos/cleaner/parsers/username_parser.py | 1 +
ba407d
 1 file changed, 1 insertion(+)
ba407d
ba407d
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
ba407d
index 0c3bbac4..64843205 100644
ba407d
--- a/sos/cleaner/parsers/username_parser.py
ba407d
+++ b/sos/cleaner/parsers/username_parser.py
ba407d
@@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser):
ba407d
     prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000'
ba407d
     regex_patterns = []
ba407d
     skip_list = [
ba407d
+        'core',
ba407d
         'nobody',
ba407d
         'nfsnobody',
ba407d
         'root'
ba407d
-- 
ba407d
2.26.3
ba407d
ba407d
ba407d
From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001
ba407d
From: Jake Hunsaker <jhunsake@redhat.com>
ba407d
Date: Fri, 18 Jun 2021 14:26:55 -0400
ba407d
Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive
ba407d
 list
ba407d
ba407d
Fixes an issue where checksums may have gotten into the list of archives
ba407d
to be cleaned, which would cause further issues later. Additionally,
ba407d
prevents nested sosreports from top-level archives (such as from
ba407d
`collect`) from being removed for being a binary file when that
ba407d
top-level archive gets obfuscated.
ba407d
---
ba407d
 sos/cleaner/__init__.py            | 5 +++--
ba407d
 sos/cleaner/obfuscation_archive.py | 1 +
ba407d
 2 files changed, 4 insertions(+), 2 deletions(-)
ba407d
ba407d
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
ba407d
index 88d4d0ea..8280bc50 100644
ba407d
--- a/sos/cleaner/__init__.py
ba407d
+++ b/sos/cleaner/__init__.py
ba407d
@@ -226,8 +226,7 @@ third party.
ba407d
         nested_archives = []
ba407d
         for _file in archive.getmembers():
ba407d
             if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
ba407d
-               (_file.name.endswith('.md5') or
ba407d
-               _file.name.endswith('.sha256'))):
ba407d
+                    (_file.name.endswith(('.md5', '.sha256')))):
ba407d
                 nested_archives.append(_file.name.split('/')[-1])
ba407d
 
ba407d
         if nested_archives:
ba407d
@@ -235,6 +234,8 @@ third party.
ba407d
             nested_path = self.extract_archive(archive)
ba407d
             for arc_file in os.listdir(nested_path):
ba407d
                 if re.match('sosreport.*.tar.*', arc_file):
ba407d
+                    if arc_file.endswith(('.md5', '.sha256')):
ba407d
+                        continue
ba407d
                     self.report_paths.append(os.path.join(nested_path,
ba407d
                                                           arc_file))
ba407d
             # add the toplevel extracted archive
ba407d
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
ba407d
index 90188358..e357450b 100644
ba407d
--- a/sos/cleaner/obfuscation_archive.py
ba407d
+++ b/sos/cleaner/obfuscation_archive.py
ba407d
@@ -58,6 +58,7 @@ class SoSObfuscationArchive():
ba407d
         Returns: list of files and file regexes
ba407d
         """
ba407d
         return [
ba407d
+            'sosreport-',
ba407d
             'sys/firmware',
ba407d
             'sys/fs',
ba407d
             'sys/kernel/debug',
ba407d
-- 
ba407d
2.26.3
ba407d