Blame SOURCES/sos-bz1985983-ocp-cluster-cleaner.patch

47940b
From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Fri, 9 Apr 2021 11:32:14 -0400
47940b
Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on
47940b
 first match
47940b
47940b
Previously, `parse_string_for_keys()`, called by `obfuscate_string()`
47940b
for non-regex based obfuscations, would return on the first match in the
47940b
string found for each parser.
47940b
47940b
Instead, continue iterating over all items in each parser's dataset
47940b
before returning the (now fully) obfuscated string.
47940b
47940b
Resolves: #2480
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/parsers/__init__.py | 2 +-
47940b
 1 file changed, 1 insertion(+), 1 deletion(-)
47940b
47940b
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
47940b
index dd0451df..c77300aa 100644
47940b
--- a/sos/cleaner/parsers/__init__.py
47940b
+++ b/sos/cleaner/parsers/__init__.py
47940b
@@ -104,7 +104,7 @@ class SoSCleanerParser():
47940b
         """
47940b
         for key, val in self.mapping.dataset.items():
47940b
             if key in string_data:
47940b
-                return string_data.replace(key, val)
47940b
+                string_data = string_data.replace(key, val)
47940b
         return string_data
47940b
 
47940b
     def get_map_contents(self):
47940b
-- 
47940b
2.26.3
47940b
47940b
From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 12 May 2021 12:39:48 -0400
47940b
Subject: [PATCH] [collect] Add options to provide registry auth for pulling
47940b
 images
47940b
47940b
Adds options that allow a user to specify registry authentication,
47940b
either via username/password or an authfile, to allow pulling an image
47940b
that exists on a non-public registry.
47940b
47940b
If a username/password is provided, that will be used. If not, we will
47940b
attempt to use an authfile - either provided by the user or by a cluster
47940b
profile.
47940b
47940b
Also adds an option to forcibly pull a new(er) version of the specified
47940b
image, to alleviate conditions where a too-old version of the image
47940b
already exists on the host.
47940b
47940b
Closes: #2534
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 man/en/sos-collect.1              | 30 +++++++++++++++++++++++
47940b
 sos/collector/__init__.py         | 17 +++++++++++++
47940b
 sos/collector/sosnode.py          | 40 +++++++++++++++++++++++++++----
47940b
 sos/policies/distros/__init__.py  | 16 ++++++++++++-
47940b
 sos/policies/distros/redhat.py    | 25 ++++++++++++-------
47940b
 sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++
47940b
 6 files changed, 140 insertions(+), 13 deletions(-)
47940b
47940b
diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
47940b
index 286bfe71..cdbc3257 100644
47940b
--- a/man/en/sos-collect.1
47940b
+++ b/man/en/sos-collect.1
47940b
@@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
47940b
     [\-\-no\-pkg\-check]
47940b
     [\-\-no\-local]
47940b
     [\-\-master MASTER]
47940b
+    [\-\-image IMAGE]
47940b
+    [\-\-force-pull-image]
47940b
+    [\-\-registry-user USER]
47940b
+    [\-\-registry-password PASSWORD]
47940b
+    [\-\-registry-authfile FILE]
47940b
     [\-o ONLY_PLUGINS]
47940b
     [\-p SSH_PORT]
47940b
     [\-\-password]
47940b
@@ -245,6 +250,31 @@ Specify a master node for the cluster.
47940b
 If provided, then sos collect will check the master node, not localhost, for determining
47940b
 the type of cluster in use.
47940b
 .TP
47940b
+\fB\-\-image IMAGE\fR
47940b
+Specify an image to use for the temporary container created for collections on
47940b
+containerized host, if you do not want to use the default image specifed by the
47940b
+host's policy. Note that this should include the registry.
47940b
+.TP
47940b
+\fB\-\-force-pull-image\fR
47940b
+Use this option to force the container runtime to pull the specified image (even
47940b
+if it is the policy default image) even if the image already exists on the host.
47940b
+This may be useful to update an older container image on containerized hosts.
47940b
+.TP
47940b
+\fB\-\-registry-user USER\fR
47940b
+Specify the username to authenticate to the registry with in order to pull the container
47940b
+image
47940b
+.TP
47940b
+\fB\-\-registry-password PASSWORD\fR
47940b
+Specify the password to authenticate to the registry with in order to pull the container
47940b
+image. If no password is required, leave this blank.
47940b
+.TP
47940b
+\fB\-\-registry-authfile FILE\fR
47940b
+Specify the filename to use for providing authentication credentials to the registry
47940b
+to pull the container image.
47940b
+
47940b
+Note that this file must exist on the node(s) performing the pull operations, not the
47940b
+node from which \fBsos collect\fR was run.
47940b
+.TP
47940b
 \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS
47940b
 Sosreport option. Run ONLY the plugins listed.
47940b
 
47940b
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
47940b
index 1c742cf5..0624caad 100644
47940b
--- a/sos/collector/__init__.py
47940b
+++ b/sos/collector/__init__.py
47940b
@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent):
47940b
         'encrypt_pass': '',
47940b
         'group': None,
47940b
         'image': '',
47940b
+        'force_pull_image': False,
47940b
         'jobs': 4,
47940b
         'keywords': [],
47940b
         'keyword_file': None,
47940b
@@ -84,6 +85,9 @@ class SoSCollector(SoSComponent):
47940b
         'plugin_timeout': None,
47940b
         'cmd_timeout': None,
47940b
         'preset': '',
47940b
+        'registry_user': None,
47940b
+        'registry_password': None,
47940b
+        'registry_authfile': None,
47940b
         'save_group': '',
47940b
         'since': '',
47940b
         'skip_commands': [],
47940b
@@ -319,6 +323,19 @@ class SoSCollector(SoSComponent):
47940b
         collect_grp.add_argument('--image',
47940b
                                  help=('Specify the container image to use for'
47940b
                                        ' containerized hosts.'))
47940b
+        collect_grp.add_argument('--force-pull-image', '--pull', default=False,
47940b
+                                 action='store_true',
47940b
+                                 help='Force pull the container image even if '
47940b
+                                      'it already exists on the host')
47940b
+        collect_grp.add_argument('--registry-user', default=None,
47940b
+                                 help='Username to authenticate to the '
47940b
+                                      'registry with for pulling an image')
47940b
+        collect_grp.add_argument('--registry-password', default=None,
47940b
+                                 help='Password to authenticate to the '
47940b
+                                      'registry with for pulling an image')
47940b
+        collect_grp.add_argument('--registry-authfile', default=None,
47940b
+                                 help='Use this authfile to provide registry '
47940b
+                                      'authentication when pulling an image')
47940b
         collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key')
47940b
         collect_grp.add_argument('-j', '--jobs', default=4, type=int,
47940b
                                  help='Number of concurrent nodes to collect')
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 48693342..d1c11824 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -134,9 +134,27 @@ class SosNode():
47940b
         """If the host is containerized, create the container we'll be using
47940b
         """
47940b
         if self.host.containerized:
47940b
-            res = self.run_command(self.host.create_sos_container(),
47940b
-                                   need_root=True)
47940b
-            if res['status'] in [0, 125]:  # 125 means container exists
47940b
+            cmd = self.host.create_sos_container(
47940b
+                image=self.opts.image,
47940b
+                auth=self.get_container_auth(),
47940b
+                force_pull=self.opts.force_pull_image
47940b
+            )
47940b
+            res = self.run_command(cmd, need_root=True)
47940b
+            if res['status'] in [0, 125]:
47940b
+                if res['status'] == 125:
47940b
+                    if 'unable to retrieve auth token' in res['stdout']:
47940b
+                        self.log_error(
47940b
+                            "Could not pull image. Provide either a username "
47940b
+                            "and password or authfile"
47940b
+                        )
47940b
+                        raise Exception
47940b
+                    elif 'unknown: Not found' in res['stdout']:
47940b
+                        self.log_error('Specified image not found on registry')
47940b
+                        raise Exception
47940b
+                    # 'name exists' with code 125 means the container was
47940b
+                    # created successfully, so ignore it.
47940b
+                # initial creations leads to an exited container, restarting it
47940b
+                # here will keep it alive for us to exec through
47940b
                 ret = self.run_command(self.host.restart_sos_container(),
47940b
                                        need_root=True)
47940b
                 if ret['status'] == 0:
47940b
@@ -152,6 +170,20 @@ class SosNode():
47940b
                                % res['stdout'])
47940b
                 raise Exception
47940b
 
47940b
+    def get_container_auth(self):
47940b
+        """Determine what the auth string should be to pull the image used to
47940b
+        deploy our temporary container
47940b
+        """
47940b
+        if self.opts.registry_user:
47940b
+            return self.host.runtimes['default'].fmt_registry_credentials(
47940b
+                self.opts.registry_user,
47940b
+                self.opts.registry_password
47940b
+            )
47940b
+        else:
47940b
+            return self.host.runtimes['default'].fmt_registry_authfile(
47940b
+                self.opts.registry_authfile or self.host.container_authfile
47940b
+            )
47940b
+
47940b
     def file_exists(self, fname):
47940b
         """Checks for the presence of fname on the remote node"""
47940b
         if not self.local:
47940b
@@ -343,7 +375,7 @@ class SosNode():
47940b
                           % self.commons['policy'].distro)
47940b
             return self.commons['policy']
47940b
         host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(),
47940b
-                    probe_runtime=False, remote_exec=self.ssh_cmd,
47940b
+                    probe_runtime=True, remote_exec=self.ssh_cmd,
47940b
                     remote_check=self.read_file('/etc/os-release'))
47940b
         if host:
47940b
             self.log_info("loaded policy %s for host" % host.distro)
47940b
diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
47940b
index 9fe31513..f5b9fd5b 100644
47940b
--- a/sos/policies/distros/__init__.py
47940b
+++ b/sos/policies/distros/__init__.py
47940b
@@ -62,6 +62,7 @@ class LinuxPolicy(Policy):
47940b
     sos_bin_path = '/usr/bin'
47940b
     sos_container_name = 'sos-collector-tmp'
47940b
     container_version_command = None
47940b
+    container_authfile = None
47940b
 
47940b
     def __init__(self, sysroot=None, init=None, probe_runtime=True):
47940b
         super(LinuxPolicy, self).__init__(sysroot=sysroot,
47940b
@@ -626,13 +627,26 @@ class LinuxPolicy(Policy):
47940b
         """
47940b
         return ''
47940b
 
47940b
-    def create_sos_container(self):
47940b
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
47940b
         """Returns the command that will create the container that will be
47940b
         used for running commands inside a container on hosts that require it.
47940b
 
47940b
         This will use the container runtime defined for the host type to
47940b
         launch a container. From there, we use the defined runtime to exec into
47940b
         the container's namespace.
47940b
+
47940b
+        :param image:   The name of the image if not using the policy default
47940b
+        :type image:    ``str`` or ``None``
47940b
+
47940b
+        :param auth:    The auth string required by the runtime to pull an
47940b
+                        image from the registry
47940b
+        :type auth:     ``str`` or ``None``
47940b
+
47940b
+        :param force_pull:  Should the runtime forcibly pull the image
47940b
+        :type force_pull:   ``bool``
47940b
+
47940b
+        :returns:   The command to execute to launch the temp container
47940b
+        :rtype:     ``str``
47940b
         """
47940b
         return ''
47940b
 
47940b
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
47940b
index 241d3f13..20afbcc4 100644
47940b
--- a/sos/policies/distros/redhat.py
47940b
+++ b/sos/policies/distros/redhat.py
47940b
@@ -452,15 +452,19 @@ support representative.
47940b
 
47940b
         return self.find_preset(ATOMIC)
47940b
 
47940b
-    def create_sos_container(self):
47940b
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
47940b
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
47940b
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
47940b
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
47940b
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
47940b
                 "/etc/machine-id:/etc/machine-id -v "
47940b
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
47940b
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
47940b
+        _image = image or self.container_image
47940b
+        _pull = '--pull=always' if force_pull else ''
47940b
         return _cmd.format(runtime=self.container_runtime,
47940b
                            name=self.sos_container_name,
47940b
-                           image=self.container_image)
47940b
+                           image=_image,
47940b
+                           pull=_pull,
47940b
+                           auth=auth or '')
47940b
 
47940b
     def set_cleanup_cmd(self):
47940b
         return 'docker rm --force sos-collector-tmp'
47940b
@@ -482,6 +486,7 @@ support representative.
47940b
     container_image = 'registry.redhat.io/rhel8/support-tools'
47940b
     sos_path_strip = '/host'
47940b
     container_version_command = 'rpm -q sos'
47940b
+    container_authfile = '/var/lib/kubelet/config.json'
47940b
 
47940b
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
47940b
                  remote_exec=None):
47940b
@@ -511,15 +516,19 @@ support representative.
47940b
         # RH OCP environments.
47940b
         return self.find_preset(RHOCP)
47940b
 
47940b
-    def create_sos_container(self):
47940b
+    def create_sos_container(self, image=None, auth=None, force_pull=False):
47940b
         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
47940b
                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
47940b
-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
47940b
+                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
47940b
                 "/etc/machine-id:/etc/machine-id -v "
47940b
-                "/etc/localtime:/etc/localtime -v /:/host {image}")
47940b
+                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
47940b
+        _image = image or self.container_image
47940b
+        _pull = '--pull=always' if force_pull else ''
47940b
         return _cmd.format(runtime=self.container_runtime,
47940b
                            name=self.sos_container_name,
47940b
-                           image=self.container_image)
47940b
+                           image=_image,
47940b
+                           pull=_pull,
47940b
+                           auth=auth or '')
47940b
 
47940b
     def set_cleanup_cmd(self):
47940b
         return 'podman rm --force %s' % self.sos_container_name
47940b
diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
47940b
index 1a61b644..f28d6a1d 100644
47940b
--- a/sos/policies/runtimes/__init__.py
47940b
+++ b/sos/policies/runtimes/__init__.py
47940b
@@ -157,6 +157,31 @@ class ContainerRuntime():
47940b
             quoted_cmd = cmd
47940b
         return "%s %s %s" % (self.run_cmd, container, quoted_cmd)
47940b
 
47940b
+    def fmt_registry_credentials(self, username, password):
47940b
+        """Format a string to pass to the 'run' command of the runtime to
47940b
+        enable authorization for pulling the image during `sos collect`, if
47940b
+        needed using username and optional password creds
47940b
+
47940b
+        :param username:    The name of the registry user
47940b
+        :type username:     ``str``
47940b
+
47940b
+        :param password:    The password of the registry user
47940b
+        :type password:     ``str`` or ``None``
47940b
+
47940b
+        :returns:  The string to use to enable a run command to pull the image
47940b
+        :rtype:    ``str``
47940b
+        """
47940b
+        return "--creds=%s%s" % (username, ':' + password if password else '')
47940b
+
47940b
+    def fmt_registry_authfile(self, authfile):
47940b
+        """Format a string to pass to the 'run' command of the runtime to
47940b
+        enable authorization for pulling the image during `sos collect`, if
47940b
+        needed using an authfile.
47940b
+        """
47940b
+        if authfile:
47940b
+            return "--authfile %s" % authfile
47940b
+        return ''
47940b
+
47940b
     def get_logs_command(self, container):
47940b
         """Get the command string used to dump container logs from the
47940b
         runtime
47940b
-- 
47940b
2.26.3
47940b
47940b
From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 12 May 2021 13:26:45 -0400
47940b
Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes
47940b
47940b
Adds a method the `Cluster` that allows a profile to set sos options
47940b
specifically for master nodes.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++
47940b
 sos/collector/sosnode.py           |  6 ++++++
47940b
 2 files changed, 27 insertions(+)
47940b
47940b
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
47940b
index 5c002bae..bfa3aad3 100644
47940b
--- a/sos/collector/clusters/__init__.py
47940b
+++ b/sos/collector/clusters/__init__.py
47940b
@@ -137,6 +137,27 @@ class Cluster():
47940b
         """
47940b
         self.cluster_ssh_key = key
47940b
 
47940b
+    def set_master_options(self, node):
47940b
+        """If there is a need to set specific options in the sos command being
47940b
+        run on the cluster's master nodes, override this method in the cluster
47940b
+        profile and do that here.
47940b
+
47940b
+        :param node:       The master node
47940b
+        :type node:        ``SoSNode``
47940b
+        """
47940b
+        pass
47940b
+
47940b
+    def check_node_is_master(self, node):
47940b
+        """In the event there are multiple masters, or if the collect command
47940b
+        is being run from a system that is technically capable of enumerating
47940b
+        nodes but the cluster profiles needs to specify master-specific options
47940b
+        for other nodes, override this method in the cluster profile
47940b
+
47940b
+        :param node:        The node for the cluster to check
47940b
+        :type node:         ``SoSNode``
47940b
+        """
47940b
+        return node.address == self.master.address
47940b
+
47940b
     def exec_master_cmd(self, cmd, need_root=False):
47940b
         """Used to retrieve command output from a (master) node in a cluster
47940b
 
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index d1c11824..62666635 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -647,6 +647,10 @@ class SosNode():
47940b
                                         self.cluster.sos_plugin_options[opt])
47940b
                     self.opts.plugin_options.append(option)
47940b
 
47940b
+        # set master-only options
47940b
+        if self.cluster.check_node_is_master(self):
47940b
+            self.cluster.set_master_options(self)
47940b
+
47940b
     def finalize_sos_cmd(self):
47940b
         """Use host facts and compare to the cluster type to modify the sos
47940b
         command if needed"""
47940b
@@ -707,6 +711,8 @@ class SosNode():
47940b
             os.path.join(self.host.sos_bin_path, self.sos_bin)
47940b
         )
47940b
 
47940b
+        self.update_cmd_from_cluster()
47940b
+
47940b
         if self.opts.only_plugins:
47940b
             plugs = [o for o in self.opts.only_plugins
47940b
                      if self._plugin_exists(o)]
47940b
-- 
47940b
2.26.3
47940b
47940b
From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 12 May 2021 16:06:29 -0400
47940b
Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node
47940b
 lists
47940b
47940b
If the master node is determined to not have sos installed, that is not
47940b
necessarily a fatal error for scenarios where the 'master' node is only
47940b
being used to enumerate node lists and is not actually part of the
47940b
cluster. This can happen when a user is using a bastion node to
47940b
enumerate and connect to the cluster environment, or if the local host
47940b
is being used to enumerate nodes via cluster client tooling.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/sosnode.py | 17 ++++++++++++-----
47940b
 1 file changed, 12 insertions(+), 5 deletions(-)
47940b
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 62666635..7e56483d 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -287,13 +287,20 @@ class SosNode():
47940b
             # use the containerized policy's command
47940b
             pkgs = self.run_command(self.host.container_version_command,
47940b
                                     use_container=True, need_root=True)
47940b
-            ver = pkgs['stdout'].strip().split('-')[1]
47940b
-            if ver:
47940b
-                self.sos_info['version'] = ver
47940b
-        if 'version' in self.sos_info:
47940b
+            if pkgs['status'] == 0:
47940b
+                ver = pkgs['stdout'].strip().split('-')[1]
47940b
+                if ver:
47940b
+                    self.sos_info['version'] = ver
47940b
+            else:
47940b
+                self.sos_info['version'] = None
47940b
+        if self.sos_info['version']:
47940b
             self.log_info('sos version is %s' % self.sos_info['version'])
47940b
         else:
47940b
-            self.log_error('sos is not installed on this node')
47940b
+            if not self.address == self.opts.master:
47940b
+                # in the case where the 'master' enumerates nodes but is not
47940b
+                # intended for collection (bastions), don't worry about sos not
47940b
+                # being present
47940b
+                self.log_error('sos is not installed on this node')
47940b
             self.connected = False
47940b
             return False
47940b
         cmd = 'sosreport -l'
47940b
-- 
47940b
2.26.3
47940b
47940b
From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 12 May 2021 18:26:09 -0400
47940b
Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for
47940b
 containers
47940b
47940b
Fixes an issue where the sos inspection commands were not properly
47940b
marked as needing to be run as root (either directly or via sudo) for
47940b
containerized hosts, which would lead to incorrect sos command
47940b
formatting.
47940b
47940b
Mark those commands, and the final container removal command, as
47940b
explicitly needing root permissions.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/sosnode.py | 6 +++---
47940b
 1 file changed, 3 insertions(+), 3 deletions(-)
47940b
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 7e56483d..1fc03076 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -304,7 +304,7 @@ class SosNode():
47940b
             self.connected = False
47940b
             return False
47940b
         cmd = 'sosreport -l'
47940b
-        sosinfo = self.run_command(cmd, use_container=True)
47940b
+        sosinfo = self.run_command(cmd, use_container=True, need_root=True)
47940b
         if sosinfo['status'] == 0:
47940b
             self._load_sos_plugins(sosinfo['stdout'])
47940b
         if self.check_sos_version('3.6'):
47940b
@@ -312,7 +312,7 @@ class SosNode():
47940b
 
47940b
     def _load_sos_presets(self):
47940b
         cmd = 'sosreport --list-presets'
47940b
-        res = self.run_command(cmd, use_container=True)
47940b
+        res = self.run_command(cmd, use_container=True, need_root=True)
47940b
         if res['status'] == 0:
47940b
             for line in res['stdout'].splitlines():
47940b
                 if line.strip().startswith('name:'):
47940b
@@ -996,7 +996,7 @@ class SosNode():
47940b
             self.remove_file(self.sos_path + '.md5')
47940b
         cleanup = self.host.set_cleanup_cmd()
47940b
         if cleanup:
47940b
-            self.run_command(cleanup)
47940b
+            self.run_command(cleanup, need_root=True)
47940b
 
47940b
     def collect_extra_cmd(self, filenames):
47940b
         """Collect the file created by a cluster outside of sos"""
47940b
-- 
47940b
2.26.3
47940b
47940b
From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 12 May 2021 18:55:31 -0400
47940b
Subject: [PATCH] [ocp] Add cluster profile for OCP4
47940b
47940b
Removes the previous OCP cluster profile and replaces it with an updated
47940b
one for OCP4 which is entirely separated from the kubernetes profile.
47940b
47940b
Resolves: #2544
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/clusters/kubernetes.py |   8 --
47940b
 sos/collector/clusters/ocp.py        | 109 +++++++++++++++++++++++++++
47940b
 2 files changed, 109 insertions(+), 8 deletions(-)
47940b
 create mode 100644 sos/collector/clusters/ocp.py
47940b
47940b
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
47940b
index 6a867e31..08fd9554 100644
47940b
--- a/sos/collector/clusters/kubernetes.py
47940b
+++ b/sos/collector/clusters/kubernetes.py
47940b
@@ -44,11 +44,3 @@ class kubernetes(Cluster):
47940b
             return nodes
47940b
         else:
47940b
             raise Exception('Node enumeration did not return usable output')
47940b
-
47940b
-
47940b
-class openshift(kubernetes):
47940b
-
47940b
-    cluster_name = 'OpenShift Container Platform'
47940b
-    packages = ('atomic-openshift',)
47940b
-    sos_preset = 'ocp'
47940b
-    cmd = 'oc'
47940b
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
47940b
new file mode 100644
47940b
index 00000000..283fcfd1
47940b
--- /dev/null
47940b
+++ b/sos/collector/clusters/ocp.py
47940b
@@ -0,0 +1,109 @@
47940b
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
47940b
+
47940b
+# This file is part of the sos project: https://github.com/sosreport/sos
47940b
+#
47940b
+# This copyrighted material is made available to anyone wishing to use,
47940b
+# modify, copy, or redistribute it subject to the terms and conditions of
47940b
+# version 2 of the GNU General Public License.
47940b
+#
47940b
+# See the LICENSE file in the source distribution for further information.
47940b
+
47940b
+from pipes import quote
47940b
+from sos.collector.clusters import Cluster
47940b
+
47940b
+
47940b
+class ocp(Cluster):
47940b
+    """OpenShift Container Platform v4"""
47940b
+
47940b
+    cluster_name = 'OpenShift Container Platform v4'
47940b
+    packages = ('openshift-hyperkube', 'openshift-clients')
47940b
+
47940b
+    option_list = [
47940b
+        ('label', '', 'Colon delimited list of labels to select nodes with'),
47940b
+        ('role', '', 'Colon delimited list of roles to select nodes with'),
47940b
+        ('kubeconfig', '', 'Path to the kubeconfig file')
47940b
+    ]
47940b
+
47940b
+    def fmt_oc_cmd(self, cmd):
47940b
+        """Format the oc command to optionall include the kubeconfig file if
47940b
+        one is specified
47940b
+        """
47940b
+        if self.get_option('kubeconfig'):
47940b
+            return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
47940b
+        return "oc %s" % cmd
47940b
+
47940b
+    def check_enabled(self):
47940b
+        if super(ocp, self).check_enabled():
47940b
+            return True
47940b
+        _who = self.fmt_oc_cmd('whoami')
47940b
+        return self.exec_master_cmd(_who)['status'] == 0
47940b
+
47940b
+    def _build_dict(self, nodelist):
47940b
+        """From the output of get_nodes(), construct an easier-to-reference
47940b
+        dict of nodes that will be used in determining labels, master status,
47940b
+        etc...
47940b
+
47940b
+        :param nodelist:        The split output of `oc get nodes`
47940b
+        :type nodelist:         ``list``
47940b
+
47940b
+        :returns:           A dict of nodes with `get nodes` columns as keys
47940b
+        :rtype:             ``dict``
47940b
+        """
47940b
+        nodes = {}
47940b
+        if 'NAME' in nodelist[0]:
47940b
+            # get the index of the fields
47940b
+            statline = nodelist.pop(0).split()
47940b
+            idx = {}
47940b
+            for state in ['status', 'roles', 'version', 'os-image']:
47940b
+                try:
47940b
+                    idx[state] = statline.index(state.upper())
47940b
+                except Exception:
47940b
+                    pass
47940b
+            for node in nodelist:
47940b
+                _node = node.split()
47940b
+                nodes[_node[0]] = {}
47940b
+                for column in idx:
47940b
+                    nodes[_node[0]][column] = _node[idx[column]]
47940b
+        return nodes
47940b
+
47940b
+    def get_nodes(self):
47940b
+        nodes = []
47940b
+        self.node_dict = {}
47940b
+        cmd = 'get nodes -o wide'
47940b
+        if self.get_option('label'):
47940b
+            labels = ','.join(self.get_option('label').split(':'))
47940b
+            cmd += " -l %s" % quote(labels)
47940b
+        res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
47940b
+        if res['status'] == 0:
47940b
+            roles = [r for r in self.get_option('role').split(':')]
47940b
+            self.node_dict = self._build_dict(res['stdout'].splitlines())
47940b
+            for node in self.node_dict:
47940b
+                if roles:
47940b
+                    for role in roles:
47940b
+                        if role in node:
47940b
+                            nodes.append(node)
47940b
+                else:
47940b
+                    nodes.append(node)
47940b
+        else:
47940b
+            msg = "'oc' command failed"
47940b
+            if 'Missing or incomplete' in res['stdout']:
47940b
+                msg = ("'oc' failed due to missing kubeconfig on master node."
47940b
+                       " Specify one via '-c ocp.kubeconfig=<path>'")
47940b
+            raise Exception(msg)
47940b
+        return nodes
47940b
+
47940b
+    def set_node_label(self, node):
47940b
+        if node.address not in self.node_dict:
47940b
+            return ''
47940b
+        for label in ['master', 'worker']:
47940b
+            if label in self.node_dict[node.address]['roles']:
47940b
+                return label
47940b
+        return ''
47940b
+
47940b
+    def check_node_is_master(self, sosnode):
47940b
+        if sosnode.address not in self.node_dict:
47940b
+            return False
47940b
+        return 'master' in self.node_dict[sosnode.address]['roles']
47940b
+
47940b
+    def set_master_options(self, node):
47940b
+        node.opts.enable_plugins.append('openshift')
47940b
-- 
47940b
2.26.3
47940b
47940b
From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 26 May 2021 11:55:24 -0400
47940b
Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files
47940b
47940b
Fixes an issue where empty lines would be stripped from files that have
47940b
other obfuscations in them. Those empty lines may be important for file
47940b
structure and/or readability, so we should instead simply not pass empty
47940b
lines to the parsers rather than skipping them wholesale in the flow of
47940b
writing obfuscations to a temp file before replacing the source file
47940b
with a potentially changed temp file.
47940b
47940b
Resolves: #2562
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/__init__.py | 6 ++++--
47940b
 1 file changed, 4 insertions(+), 2 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index bdd24f95..55465b85 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -603,8 +603,6 @@ third party.
47940b
         tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
47940b
         with open(filename, 'r') as fname:
47940b
             for line in fname:
47940b
-                if not line.strip():
47940b
-                    continue
47940b
                 try:
47940b
                     line, count = self.obfuscate_line(line)
47940b
                     subs += count
47940b
@@ -642,7 +640,11 @@ third party.
47940b
 
47940b
         Returns the fully obfuscated line and the number of substitutions made
47940b
         """
47940b
+        # don't iterate over blank lines, but still write them to the tempfile
47940b
+        # to maintain the same structure when we write a scrubbed file back
47940b
         count = 0
47940b
+        if not line.strip():
47940b
+            return line, count
47940b
         for parser in self.parsers:
47940b
             try:
47940b
                 line, _count = parser.parse_line(line)
47940b
-- 
47940b
2.26.3
47940b
47940b
From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 5 May 2021 17:02:04 -0400
47940b
Subject: [PATCH] [cleaner] Remove binary files by default
47940b
47940b
Binary files generally speaking cannot be obfuscated, and as such we
47940b
should remove them from archives being obfuscated by default so that
47940b
sensitive data is not mistakenly included in an obfuscated archive.
47940b
47940b
This commits adds a new `--keep-binary-files` option that if used will
47940b
keep any encountered binary files in the final archive. The default
47940b
option of `false` will ensure that encountered binary files are removed.
47940b
47940b
The number of removed binary files per archive is reported when
47940b
obfuscation is completed for that archive.
47940b
47940b
Closes: #2478
47940b
Resolves: #2524
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 man/en/sos-clean.1                          |  12 ++++
47940b
 sos/cleaner/__init__.py                     |  21 +++++-
47940b
 sos/cleaner/obfuscation_archive.py          |  67 ++++++++++++++++++--
47940b
 sos/collector/__init__.py                   |   5 ++
47940b
 sos/report/__init__.py                      |   6 ++
47940b
 8 files changed, 167 insertions(+), 7 deletions(-)
47940b
47940b
diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
47940b
index 4856b43b..b77bc63c 100644
47940b
--- a/man/en/sos-clean.1
47940b
+++ b/man/en/sos-clean.1
47940b
@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
47940b
     [\-\-map-file]
47940b
     [\-\-jobs]
47940b
     [\-\-no-update]
47940b
+    [\-\-keep-binary-files]
47940b
 
47940b
 .SH DESCRIPTION
47940b
 \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
47940b
@@ -77,6 +78,17 @@ Default: 4
47940b
 .TP
47940b
 .B \-\-no-update
47940b
 Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
47940b
+.TP
47940b
+.B \-\-keep-binary-files
47940b
+Keep unprocessable binary files in the archive, rather than removing them.
47940b
+
47940b
+Note that binary files cannot be obfuscated, and thus keeping them in the archive
47940b
+may result in otherwise sensitive information being included in the final archive.
47940b
+Users should review any archive that keeps binary files in place before sending to
47940b
+a third party.
47940b
+
47940b
+Default: False (remove encountered binary files)
47940b
+
47940b
 .SH SEE ALSO
47940b
 .BR sos (1)
47940b
 .BR sos-report (1)
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index 55465b85..f88ff8a0 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
47940b
         'keyword_file': None,
47940b
         'map_file': '/etc/sos/cleaner/default_mapping',
47940b
         'no_update': False,
47940b
+        'keep_binary_files': False,
47940b
         'target': '',
47940b
         'usernames': []
47940b
     }
47940b
@@ -183,6 +184,11 @@ third party.
47940b
                                action='store_true',
47940b
                                help='Do not update the --map file with new '
47940b
                                     'mappings from this run')
47940b
+        clean_grp.add_argument('--keep-binary-files', default=False,
47940b
+                               action='store_true',
47940b
+                               dest='keep_binary_files',
47940b
+                               help='Keep unprocessable binary files in the '
47940b
+                                    'archive instead of removing them')
47940b
         clean_grp.add_argument('--usernames', dest='usernames', default=[],
47940b
                                action='extend',
47940b
                                help='List of usernames to obfuscate')
47940b
@@ -467,6 +473,11 @@ third party.
47940b
                        "%s concurrently\n"
47940b
                        % (len(self.report_paths), self.opts.jobs))
47940b
                 self.ui_log.info(msg)
47940b
+            if self.opts.keep_binary_files:
47940b
+                self.ui_log.warning(
47940b
+                    "WARNING: binary files that potentially contain sensitive "
47940b
+                    "information will NOT be removed from the final archive\n"
47940b
+                )
47940b
             pool = ThreadPoolExecutor(self.opts.jobs)
47940b
             pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
47940b
             pool.shutdown(wait=True)
47940b
@@ -539,6 +550,10 @@ third party.
47940b
                 short_name = fname.split(archive.archive_name + '/')[1]
47940b
                 if archive.should_skip_file(short_name):
47940b
                     continue
47940b
+                if (not self.opts.keep_binary_files and
47940b
+                        archive.should_remove_file(short_name)):
47940b
+                    archive.remove_file(short_name)
47940b
+                    continue
47940b
                 try:
47940b
                     count = self.obfuscate_file(fname, short_name,
47940b
                                                 archive.archive_name)
47940b
@@ -574,7 +589,11 @@ third party.
47940b
             arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
47940b
             arc_md.add_field('total_substitutions', archive.total_sub_count)
47940b
             self.completed_reports.append(archive)
47940b
-            archive.report_msg("Obfuscation completed")
47940b
+            rmsg = ''
47940b
+            if archive.removed_file_count:
47940b
+                rmsg = " [removed %s unprocessable files]"
47940b
+                rmsg = rmsg % archive.removed_file_count
47940b
+            archive.report_msg("Obfuscation completed%s" % rmsg)
47940b
 
47940b
         except Exception as err:
47940b
             self.ui_log.info("Exception while processing %s: %s"
47940b
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
47940b
index c64ab13b..76841b51 100644
47940b
--- a/sos/cleaner/obfuscation_archive.py
47940b
+++ b/sos/cleaner/obfuscation_archive.py
47940b
@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
47940b
 
47940b
     file_sub_list = []
47940b
     total_sub_count = 0
47940b
+    removed_file_count = 0
47940b
 
47940b
     def __init__(self, archive_path, tmpdir):
47940b
         self.archive_path = archive_path
47940b
@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
47940b
             'sys/firmware',
47940b
             'sys/fs',
47940b
             'sys/kernel/debug',
47940b
-            'sys/module',
47940b
-            r'.*\.tar$',  # TODO: support archive unpacking
47940b
-            # Be explicit with these tar matches to avoid matching commands
47940b
-            r'.*\.tar\.xz',
47940b
-            '.*.gz'
47940b
+            'sys/module'
47940b
         ]
47940b
 
47940b
     @property
47940b
@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
47940b
         except Exception:
47940b
             return False
47940b
 
47940b
+    def remove_file(self, fname):
47940b
+        """Remove a file from the archive. This is used when cleaner encounters
47940b
+        a binary file, which we cannot reliably obfuscate.
47940b
+        """
47940b
+        full_fname = self.get_file_path(fname)
47940b
+        # don't call a blank remove() here
47940b
+        if full_fname:
47940b
+            self.log_info("Removing binary file '%s' from archive" % fname)
47940b
+            os.remove(full_fname)
47940b
+            self.removed_file_count += 1
47940b
+
47940b
     def extract(self):
47940b
         if self.is_tarfile:
47940b
             self.report_msg("Extracting...")
47940b
@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
47940b
             if filename.startswith(_skip) or re.match(_skip, filename):
47940b
                 return True
47940b
         return False
47940b
+
47940b
+    def should_remove_file(self, fname):
47940b
+        """Determine if the file should be removed or not, due to an inability
47940b
+        to reliably obfuscate that file based on the filename.
47940b
+
47940b
+        :param fname:       Filename relative to the extracted archive root
47940b
+        :type fname:        ``str``
47940b
+
47940b
+        :returns:   ``True`` if the file cannot be reliably obfuscated
47940b
+        :rtype:     ``bool``
47940b
+        """
47940b
+        obvious_removes = [
47940b
+            r'.*\.gz',  # TODO: support flat gz/xz extraction
47940b
+            r'.*\.xz',
47940b
+            r'.*\.bzip2',
47940b
+            r'.*\.tar\..*',  # TODO: support archive unpacking
47940b
+            r'.*\.txz$',
47940b
+            r'.*\.tgz$',
47940b
+            r'.*\.bin',
47940b
+            r'.*\.journal',
47940b
+            r'.*\~$'
47940b
+        ]
47940b
+
47940b
+        # if the filename matches, it is obvious we can remove them without
47940b
+        # doing the read test
47940b
+        for _arc_reg in obvious_removes:
47940b
+            if re.match(_arc_reg, fname):
47940b
+                return True
47940b
+
47940b
+        return self.file_is_binary(fname)
47940b
+
47940b
+    def file_is_binary(self, fname):
47940b
+        """Determine if the file is a binary file or not.
47940b
+
47940b
+
47940b
+        :param fname:          Filename relative to the extracted archive root
47940b
+        :type fname:           ``str``
47940b
+
47940b
+        :returns:   ``True`` if file is binary, else ``False``
47940b
+        :rtype:     ``bool``
47940b
+        """
47940b
+        with open(self.get_file_path(fname), 'tr') as tfile:
47940b
+            try:
47940b
+                # when opened as above (tr), reading binary content will raise
47940b
+                # an exception
47940b
+                tfile.read(1)
47940b
+                return False
47940b
+            except UnicodeDecodeError:
47940b
+                return True
47940b
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
47940b
index 9884836c..469db60d 100644
47940b
--- a/sos/collector/__init__.py
47940b
+++ b/sos/collector/__init__.py
47940b
@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
47940b
         'jobs': 4,
47940b
         'keywords': [],
47940b
         'keyword_file': None,
47940b
+        'keep_binary_files': False,
47940b
         'label': '',
47940b
         'list_options': False,
47940b
         'log_size': 0,
47940b
@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
47940b
                                  dest='clean',
47940b
                                  default=False, action='store_true',
47940b
                                  help='Obfuscate sensistive information')
47940b
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
47940b
+                                 action='store_true', dest='keep_binary_files',
47940b
+                                 help='Keep unprocessable binary files in the '
47940b
+                                      'archive instead of removing them')
47940b
         cleaner_grp.add_argument('--domains', dest='domains', default=[],
47940b
                                  action='extend',
47940b
                                  help='Additional domain names to obfuscate')
47940b
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
47940b
index d4345409..2cedc76e 100644
47940b
--- a/sos/report/__init__.py
47940b
+++ b/sos/report/__init__.py
47940b
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
47940b
         'case_id': '',
47940b
         'chroot': 'auto',
47940b
         'clean': False,
47940b
+        'keep_binary_files': False,
47940b
         'desc': '',
47940b
         'domains': [],
47940b
         'dry_run': False,
47940b
@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
47940b
                                  default='/etc/sos/cleaner/default_mapping',
47940b
                                  help=('Provide a previously generated mapping'
47940b
                                        ' file for obfuscation'))
47940b
+        cleaner_grp.add_argument('--keep-binary-files', default=False,
47940b
+                                 action='store_true',
47940b
+                                 dest='keep_binary_files',
47940b
+                                 help='Keep unprocessable binary files in the '
47940b
+                                      'archive instead of removing them')
47940b
         cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
47940b
                                  action='extend',
47940b
                                  help='List of usernames to obfuscate')
47940b
47940b
From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Fri, 11 Jun 2021 23:20:59 -0400
47940b
Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options
47940b
47940b
Like we now do for primary nodes, add the ability to individually set
47940b
node options via a new `set_node_options()` method for when blanket
47940b
setting options across all nodes via the options class attrs is not
47940b
sufficient.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/clusters/__init__.py | 10 ++++++++++
47940b
 sos/collector/sosnode.py           |  6 ++++--
47940b
 2 files changed, 14 insertions(+), 2 deletions(-)
47940b
47940b
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
47940b
index 90e62d79..c4da1ab8 100644
47940b
--- a/sos/collector/clusters/__init__.py
47940b
+++ b/sos/collector/clusters/__init__.py
47940b
@@ -137,6 +137,16 @@ class Cluster():
47940b
         """
47940b
         self.cluster_ssh_key = key
47940b
 
47940b
+    def set_node_options(self, node):
47940b
+        """If there is a need to set specific options on ONLY the non-primary
47940b
+        nodes in a collection, override this method in the cluster profile
47940b
+        and do that here.
47940b
+
47940b
+        :param node:        The non-primary node
47940b
+        :type node:         ``SoSNode``
47940b
+        """
47940b
+        pass
47940b
+
47940b
     def set_master_options(self, node):
47940b
         """If there is a need to set specific options in the sos command being
47940b
         run on the cluster's master nodes, override this method in the cluster
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 1fc03076..7e784aa1 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -657,6 +657,8 @@ class SosNode():
47940b
         # set master-only options
47940b
         if self.cluster.check_node_is_master(self):
47940b
             self.cluster.set_master_options(self)
47940b
+        else:
47940b
+            self.cluster.set_node_options(self)
47940b
 
47940b
     def finalize_sos_cmd(self):
47940b
         """Use host facts and compare to the cluster type to modify the sos
47940b
@@ -713,13 +715,13 @@ class SosNode():
47940b
                 sos_opts.append('--cmd-timeout=%s'
47940b
                                 % quote(str(self.opts.cmd_timeout)))
47940b
 
47940b
+        self.update_cmd_from_cluster()
47940b
+
47940b
         sos_cmd = sos_cmd.replace(
47940b
             'sosreport',
47940b
             os.path.join(self.host.sos_bin_path, self.sos_bin)
47940b
         )
47940b
 
47940b
-        self.update_cmd_from_cluster()
47940b
-
47940b
         if self.opts.only_plugins:
47940b
             plugs = [o for o in self.opts.only_plugins
47940b
                      if self._plugin_exists(o)]
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Sat, 12 Jun 2021 00:02:36 -0400
47940b
Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()`
47940b
47940b
Updates `run_command()` to support passing new environment variables to
47940b
the command being run, for that command alone. This parameter takes a
47940b
dict, and if set we will first copy the existing set of env vars on the
47940b
node and then update that set of variables using the passed dict.
47940b
47940b
Additionally, `execute_sos_command()` will now try to pass a new
47940b
`sos_env_vars` dict (default empty) so that clusters may set environment
47940b
variables specifically for the sos command being run, without having to
47940b
modify the actual sos command being executed.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/sosnode.py | 27 ++++++++++++++++++++++++---
47940b
 1 file changed, 24 insertions(+), 3 deletions(-)
47940b
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 7e784aa1..40472a4e 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -45,6 +45,8 @@ class SosNode():
47940b
         self.host = None
47940b
         self.cluster = None
47940b
         self.hostname = None
47940b
+        self.sos_env_vars = {}
47940b
+        self._env_vars = {}
47940b
         self._password = password or self.opts.password
47940b
         if not self.opts.nopasswd_sudo and not self.opts.sudo_pw:
47940b
             self.opts.sudo_pw = self._password
47940b
@@ -109,6 +111,21 @@ class SosNode():
47940b
     def _fmt_msg(self, msg):
47940b
         return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg)
47940b
 
47940b
+    @property
47940b
+    def env_vars(self):
47940b
+        if not self._env_vars:
47940b
+            if self.local:
47940b
+                self._env_vars = os.environ.copy()
47940b
+            else:
47940b
+                ret = self.run_command("env --null")
47940b
+                if ret['status'] == 0:
47940b
+                    for ln in ret['output'].split('\x00'):
47940b
+                        if not ln:
47940b
+                            continue
47940b
+                        _val = ln.split('=')
47940b
+                        self._env_vars[_val[0]] = _val[1]
47940b
+        return self._env_vars
47940b
+
47940b
     def set_node_manifest(self, manifest):
47940b
         """Set the manifest section that this node will write to
47940b
         """
47940b
@@ -404,7 +421,7 @@ class SosNode():
47940b
         return self.host.package_manager.pkg_by_name(pkg) is not None
47940b
 
47940b
     def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
47940b
-                    force_local=False, use_container=False):
47940b
+                    force_local=False, use_container=False, env=None):
47940b
         """Runs a given cmd, either via the SSH session or locally
47940b
 
47940b
         Arguments:
47940b
@@ -446,7 +463,10 @@ class SosNode():
47940b
         else:
47940b
             if get_pty:
47940b
                 cmd = "/bin/bash -c %s" % quote(cmd)
47940b
-        res = pexpect.spawn(cmd, encoding='utf-8')
47940b
+        if env:
47940b
+            _cmd_env = self.env_vars
47940b
+            _cmd_env.update(env)
47940b
+        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
47940b
         if need_root:
47940b
             if self.need_sudo:
47940b
                 res.sendline(self.opts.sudo_pw)
47940b
@@ -830,7 +850,8 @@ class SosNode():
47940b
             res = self.run_command(self.sos_cmd,
47940b
                                    timeout=self.opts.timeout,
47940b
                                    get_pty=True, need_root=True,
47940b
-                                   use_container=True)
47940b
+                                   use_container=True,
47940b
+                                   env=self.sos_env_vars)
47940b
             if res['status'] == 0:
47940b
                 for line in res['stdout'].splitlines():
47940b
                     if fnmatch.fnmatch(line, '*sosreport-*tar*'):
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Tue, 15 Jun 2021 12:43:27 -0400
47940b
Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options
47940b
 between nodes
47940b
47940b
This commit overhauls the function of `finalize_sos_cmd()` in several
47940b
ways.
47940b
47940b
First, assign the sos report plugin related options directly to private
47940b
copies of those values for each node, so that the shared cluster profile
47940b
does not clober options between nodes.
47940b
47940b
Second, provide a default Lock mechanism for clusters that need to
47940b
perform some node-comparison logic when assigning options based on node
47940b
role.
47940b
47940b
Finally, finalize the sos command for each node _prior_ to the call to
47940b
`SoSNode.sosreport()` so that we can be sure that clusters are able to
47940b
appropriately compare and assign sos options across nodes before some
47940b
nodes have already started and/or finished their own sos report
47940b
collections.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/__init__.py          | 14 +++++
47940b
 sos/collector/clusters/__init__.py |  2 +
47940b
 sos/collector/sosnode.py           | 89 +++++++++++++++++-------------
47940b
 3 files changed, 67 insertions(+), 38 deletions(-)
47940b
47940b
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
47940b
index 469db60d..7b8cfcf7 100644
47940b
--- a/sos/collector/__init__.py
47940b
+++ b/sos/collector/__init__.py
47940b
@@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to.
47940b
                              "concurrently\n"
47940b
                              % (self.report_num, self.opts.jobs))
47940b
 
47940b
+            npool = ThreadPoolExecutor(self.opts.jobs)
47940b
+            npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1)
47940b
+            npool.shutdown(wait=True)
47940b
+
47940b
             pool = ThreadPoolExecutor(self.opts.jobs)
47940b
             pool.map(self._collect, self.client_list, chunksize=1)
47940b
             pool.shutdown(wait=True)
47940b
@@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to.
47940b
             except Exception as err:
47940b
                 self.ui_log.error("Upload attempt failed: %s" % err)
47940b
 
47940b
+    def _finalize_sos_cmd(self, client):
47940b
+        """Calls finalize_sos_cmd() on each node so that we have the final
47940b
+        command before we thread out the actual execution of sos
47940b
+        """
47940b
+        try:
47940b
+            client.finalize_sos_cmd()
47940b
+        except Exception as err:
47940b
+            self.log_error("Could not finalize sos command for %s: %s"
47940b
+                           % (client.address, err))
47940b
+
47940b
     def _collect(self, client):
47940b
         """Runs sosreport on each node"""
47940b
         try:
47940b
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
47940b
index c4da1ab8..bb728bc0 100644
47940b
--- a/sos/collector/clusters/__init__.py
47940b
+++ b/sos/collector/clusters/__init__.py
47940b
@@ -11,6 +11,7 @@
47940b
 import logging
47940b
 
47940b
 from sos.options import ClusterOption
47940b
+from threading import Lock
47940b
 
47940b
 
47940b
 class Cluster():
47940b
@@ -66,6 +67,7 @@ class Cluster():
47940b
             if cls.__name__ != 'Cluster':
47940b
                 self.cluster_type.append(cls.__name__)
47940b
         self.node_list = None
47940b
+        self.lock = Lock()
47940b
         self.soslog = logging.getLogger('sos')
47940b
         self.ui_log = logging.getLogger('sos_ui')
47940b
         self.options = []
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 40472a4e..1c25cc34 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -38,6 +38,7 @@ class SosNode():
47940b
         self.address = address.strip()
47940b
         self.commons = commons
47940b
         self.opts = commons['cmdlineopts']
47940b
+        self._assign_config_opts()
47940b
         self.tmpdir = commons['tmpdir']
47940b
         self.hostlen = commons['hostlen']
47940b
         self.need_sudo = commons['need_sudo']
47940b
@@ -465,8 +466,8 @@ class SosNode():
47940b
                 cmd = "/bin/bash -c %s" % quote(cmd)
47940b
         if env:
47940b
             _cmd_env = self.env_vars
47940b
-            _cmd_env.update(env)
47940b
-        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
47940b
+            env = _cmd_env.update(env)
47940b
+        res = pexpect.spawn(cmd, encoding='utf-8', env=env)
47940b
         if need_root:
47940b
             if self.need_sudo:
47940b
                 res.sendline(self.opts.sudo_pw)
47940b
@@ -484,9 +485,6 @@ class SosNode():
47940b
 
47940b
     def sosreport(self):
47940b
         """Run a sosreport on the node, then collect it"""
47940b
-        self.sos_cmd = self.finalize_sos_cmd()
47940b
-        self.log_info('Final sos command set to %s' % self.sos_cmd)
47940b
-        self.manifest.add_field('final_sos_command', self.sos_cmd)
47940b
         try:
47940b
             path = self.execute_sos_command()
47940b
             if path:
47940b
@@ -656,29 +654,42 @@ class SosNode():
47940b
         This will NOT override user supplied options.
47940b
         """
47940b
         if self.cluster.sos_preset:
47940b
-            if not self.opts.preset:
47940b
-                self.opts.preset = self.cluster.sos_preset
47940b
+            if not self.preset:
47940b
+                self.preset = self.cluster.sos_preset
47940b
             else:
47940b
                 self.log_info('Cluster specified preset %s but user has also '
47940b
                               'defined a preset. Using user specification.'
47940b
                               % self.cluster.sos_preset)
47940b
         if self.cluster.sos_plugins:
47940b
             for plug in self.cluster.sos_plugins:
47940b
-                if plug not in self.opts.enable_plugins:
47940b
-                    self.opts.enable_plugins.append(plug)
47940b
+                if plug not in self.enable_plugins:
47940b
+                    self.enable_plugins.append(plug)
47940b
 
47940b
         if self.cluster.sos_plugin_options:
47940b
             for opt in self.cluster.sos_plugin_options:
47940b
-                if not any(opt in o for o in self.opts.plugin_options):
47940b
+                if not any(opt in o for o in self.plugin_options):
47940b
                     option = '%s=%s' % (opt,
47940b
                                         self.cluster.sos_plugin_options[opt])
47940b
-                    self.opts.plugin_options.append(option)
47940b
+                    self.plugin_options.append(option)
47940b
 
47940b
         # set master-only options
47940b
         if self.cluster.check_node_is_master(self):
47940b
-            self.cluster.set_master_options(self)
47940b
+            with self.cluster.lock:
47940b
+                self.cluster.set_master_options(self)
47940b
         else:
47940b
-            self.cluster.set_node_options(self)
47940b
+            with self.cluster.lock:
47940b
+                self.cluster.set_node_options(self)
47940b
+
47940b
+    def _assign_config_opts(self):
47940b
+        """From the global opts configuration, assign those values locally
47940b
+        to this node so that they may be acted on individually.
47940b
+        """
47940b
+        # assign these to new, private copies
47940b
+        self.only_plugins = list(self.opts.only_plugins)
47940b
+        self.skip_plugins = list(self.opts.skip_plugins)
47940b
+        self.enable_plugins = list(self.opts.enable_plugins)
47940b
+        self.plugin_options = list(self.opts.plugin_options)
47940b
+        self.preset = list(self.opts.preset)
47940b
 
47940b
     def finalize_sos_cmd(self):
47940b
         """Use host facts and compare to the cluster type to modify the sos
47940b
@@ -742,59 +753,61 @@ class SosNode():
47940b
             os.path.join(self.host.sos_bin_path, self.sos_bin)
47940b
         )
47940b
 
47940b
-        if self.opts.only_plugins:
47940b
-            plugs = [o for o in self.opts.only_plugins
47940b
-                     if self._plugin_exists(o)]
47940b
-            if len(plugs) != len(self.opts.only_plugins):
47940b
-                not_only = list(set(self.opts.only_plugins) - set(plugs))
47940b
+        if self.only_plugins:
47940b
+            plugs = [o for o in self.only_plugins if self._plugin_exists(o)]
47940b
+            if len(plugs) != len(self.only_plugins):
47940b
+                not_only = list(set(self.only_plugins) - set(plugs))
47940b
                 self.log_debug('Requested plugins %s were requested to be '
47940b
                                'enabled but do not exist' % not_only)
47940b
-            only = self._fmt_sos_opt_list(self.opts.only_plugins)
47940b
+            only = self._fmt_sos_opt_list(self.only_plugins)
47940b
             if only:
47940b
                 sos_opts.append('--only-plugins=%s' % quote(only))
47940b
-            return "%s %s" % (sos_cmd, ' '.join(sos_opts))
47940b
+            self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
47940b
+            self.log_info('Final sos command set to %s' % self.sos_cmd)
47940b
+            self.manifest.add_field('final_sos_command', self.sos_cmd)
47940b
+            return
47940b
 
47940b
-        if self.opts.skip_plugins:
47940b
+        if self.skip_plugins:
47940b
             # only run skip-plugins for plugins that are enabled
47940b
-            skip = [o for o in self.opts.skip_plugins
47940b
-                    if self._check_enabled(o)]
47940b
-            if len(skip) != len(self.opts.skip_plugins):
47940b
-                not_skip = list(set(self.opts.skip_plugins) - set(skip))
47940b
+            skip = [o for o in self.skip_plugins if self._check_enabled(o)]
47940b
+            if len(skip) != len(self.skip_plugins):
47940b
+                not_skip = list(set(self.skip_plugins) - set(skip))
47940b
                 self.log_debug('Requested to skip plugins %s, but plugins are '
47940b
                                'already not enabled' % not_skip)
47940b
             skipln = self._fmt_sos_opt_list(skip)
47940b
             if skipln:
47940b
                 sos_opts.append('--skip-plugins=%s' % quote(skipln))
47940b
 
47940b
-        if self.opts.enable_plugins:
47940b
+        if self.enable_plugins:
47940b
             # only run enable for plugins that are disabled
47940b
-            opts = [o for o in self.opts.enable_plugins
47940b
-                    if o not in self.opts.skip_plugins
47940b
+            opts = [o for o in self.enable_plugins
47940b
+                    if o not in self.skip_plugins
47940b
                     and self._check_disabled(o) and self._plugin_exists(o)]
47940b
-            if len(opts) != len(self.opts.enable_plugins):
47940b
-                not_on = list(set(self.opts.enable_plugins) - set(opts))
47940b
+            if len(opts) != len(self.enable_plugins):
47940b
+                not_on = list(set(self.enable_plugins) - set(opts))
47940b
                 self.log_debug('Requested to enable plugins %s, but plugins '
47940b
                                'are already enabled or do not exist' % not_on)
47940b
             enable = self._fmt_sos_opt_list(opts)
47940b
             if enable:
47940b
                 sos_opts.append('--enable-plugins=%s' % quote(enable))
47940b
 
47940b
-        if self.opts.plugin_options:
47940b
-            opts = [o for o in self.opts.plugin_options
47940b
+        if self.plugin_options:
47940b
+            opts = [o for o in self.plugin_options
47940b
                     if self._plugin_exists(o.split('.')[0])
47940b
                     and self._plugin_option_exists(o.split('=')[0])]
47940b
             if opts:
47940b
                 sos_opts.append('-k %s' % quote(','.join(o for o in opts)))
47940b
 
47940b
-        if self.opts.preset:
47940b
-            if self._preset_exists(self.opts.preset):
47940b
-                sos_opts.append('--preset=%s' % quote(self.opts.preset))
47940b
+        if self.preset:
47940b
+            if self._preset_exists(self.preset):
47940b
+                sos_opts.append('--preset=%s' % quote(self.preset))
47940b
             else:
47940b
                 self.log_debug('Requested to enable preset %s but preset does '
47940b
-                               'not exist on node' % self.opts.preset)
47940b
+                               'not exist on node' % self.preset)
47940b
 
47940b
-        _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
47940b
-        return _sos_cmd
47940b
+        self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
47940b
+        self.log_info('Final sos command set to %s' % self.sos_cmd)
47940b
+        self.manifest.add_field('final_sos_command', self.sos_cmd)
47940b
 
47940b
     def determine_sos_label(self):
47940b
         """Determine what, if any, label should be added to the sosreport"""
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Tue, 15 Jun 2021 18:38:34 -0400
47940b
Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation
47940b
47940b
Improves handling of symlink obfuscation by only performing the
47940b
obfuscaiton on the ultimate target of any symlinks encountered. Now,
47940b
when a symlink is encountered, clean will obfuscate the link name and
47940b
re-write it in the archive, pointing to the (potentially obfuscated)
47940b
target name.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------
47940b
 1 file changed, 46 insertions(+), 19 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index abfb684b..b38c8dfc 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -612,28 +612,55 @@ third party.
47940b
         if not filename:
47940b
             # the requested file doesn't exist in the archive
47940b
             return
47940b
-        self.log_debug("Obfuscating %s" % short_name or filename,
47940b
-                       caller=arc_name)
47940b
         subs = 0
47940b
-        tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
47940b
-        with open(filename, 'r') as fname:
47940b
-            for line in fname:
47940b
-                try:
47940b
-                    line, count = self.obfuscate_line(line)
47940b
-                    subs += count
47940b
-                    tfile.write(line)
47940b
-                except Exception as err:
47940b
-                    self.log_debug("Unable to obfuscate %s: %s"
47940b
-                                   % (short_name, err), caller=arc_name)
47940b
-        tfile.seek(0)
47940b
-        if subs:
47940b
-            shutil.copy(tfile.name, filename)
47940b
-        tfile.close()
47940b
-        _ob_filename = self.obfuscate_string(short_name)
47940b
-        if _ob_filename != short_name:
47940b
+        if not os.path.islink(filename):
47940b
+            # don't run the obfuscation on the link, but on the actual file
47940b
+            # at some other point.
47940b
+            self.log_debug("Obfuscating %s" % short_name or filename,
47940b
+                           caller=arc_name)
47940b
+            tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
47940b
+            with open(filename, 'r') as fname:
47940b
+                for line in fname:
47940b
+                    try:
47940b
+                        line, count = self.obfuscate_line(line)
47940b
+                        subs += count
47940b
+                        tfile.write(line)
47940b
+                    except Exception as err:
47940b
+                        self.log_debug("Unable to obfuscate %s: %s"
47940b
+                                       % (short_name, err), caller=arc_name)
47940b
+            tfile.seek(0)
47940b
+            if subs:
47940b
+                shutil.copy(tfile.name, filename)
47940b
+            tfile.close()
47940b
+
47940b
+        _ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
47940b
+        _ob_filename = short_name.replace(short_name.split('/')[-1],
47940b
+                                          _ob_short_name)
47940b
+        _sym_changed = False
47940b
+        if os.path.islink(filename):
47940b
+            _link = os.readlink(filename)
47940b
+            _ob_link = self.obfuscate_string(_link)
47940b
+            if _ob_link != _link:
47940b
+                _sym_changed = True
47940b
+
47940b
+        if (_ob_filename != short_name) or _sym_changed:
47940b
             arc_path = filename.split(short_name)[0]
47940b
             _ob_path = os.path.join(arc_path, _ob_filename)
47940b
-            os.rename(filename, _ob_path)
47940b
+            # ensure that any plugin subdirs that contain obfuscated strings
47940b
+            # get created with obfuscated counterparts
47940b
+            if not os.path.islink(filename):
47940b
+                os.rename(filename, _ob_path)
47940b
+            else:
47940b
+                # generate the obfuscated name of the link target
47940b
+                _target_ob = self.obfuscate_string(os.readlink(filename))
47940b
+                # remove the unobfuscated original symlink first, in case the
47940b
+                # symlink name hasn't changed but the target has
47940b
+                os.remove(filename)
47940b
+                # create the newly obfuscated symlink, pointing to the
47940b
+                # obfuscated target name, which may not exist just yet, but
47940b
+                # when the actual file is obfuscated, will be created
47940b
+                os.symlink(_target_ob, _ob_path)
47940b
+
47940b
         return subs
47940b
 
47940b
     def obfuscate_string(self, string_data):
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Tue, 15 Jun 2021 22:56:19 -0400
47940b
Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match
47940b
 first
47940b
47940b
When matching strings in parsers to do obfuscation, we should be using
47940b
the most precise matches found first, rather than matching in the order
47940b
a match is hit. This ensures that we correctly obfuscate an entire
47940b
string, rather than potentially only partial substring(s) that exist
47940b
within the entire match.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/parsers/__init__.py        | 10 +++++++---
47940b
 sos/cleaner/parsers/keyword_parser.py  |  2 +-
47940b
 sos/cleaner/parsers/username_parser.py |  2 +-
47940b
 3 files changed, 9 insertions(+), 5 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
47940b
index c77300aa..cfa20b95 100644
47940b
--- a/sos/cleaner/parsers/__init__.py
47940b
+++ b/sos/cleaner/parsers/__init__.py
47940b
@@ -82,10 +82,12 @@ class SoSCleanerParser():
47940b
         for pattern in self.regex_patterns:
47940b
             matches = [m[0] for m in re.findall(pattern, line, re.I)]
47940b
             if matches:
47940b
+                matches.sort(reverse=True, key=lambda x: len(x))
47940b
                 count += len(matches)
47940b
                 for match in matches:
47940b
-                    new_match = self.mapping.get(match.strip())
47940b
-                    line = line.replace(match.strip(), new_match)
47940b
+                    match = match.strip()
47940b
+                    new_match = self.mapping.get(match)
47940b
+                    line = line.replace(match, new_match)
47940b
         return line, count
47940b
 
47940b
     def parse_string_for_keys(self, string_data):
47940b
@@ -102,7 +104,9 @@ class SoSCleanerParser():
47940b
         :returns: The obfuscated line
47940b
         :rtype: ``str``
47940b
         """
47940b
-        for key, val in self.mapping.dataset.items():
47940b
+        for pair in sorted(self.mapping.dataset.items(), reverse=True,
47940b
+                           key=lambda x: len(x[0])):
47940b
+            key, val = pair
47940b
             if key in string_data:
47940b
                 string_data = string_data.replace(key, val)
47940b
         return string_data
47940b
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
47940b
index 3dc2b7f0..9134f82d 100644
47940b
--- a/sos/cleaner/parsers/keyword_parser.py
47940b
+++ b/sos/cleaner/parsers/keyword_parser.py
47940b
@@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser):
47940b
 
47940b
     def parse_line(self, line):
47940b
         count = 0
47940b
-        for keyword in self.user_keywords:
47940b
+        for keyword in sorted(self.user_keywords, reverse=True):
47940b
             if keyword in line:
47940b
                 line = line.replace(keyword, self.mapping.get(keyword))
47940b
                 count += 1
47940b
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
47940b
index 2bb6c7f3..0c3bbac4 100644
47940b
--- a/sos/cleaner/parsers/username_parser.py
47940b
+++ b/sos/cleaner/parsers/username_parser.py
47940b
@@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser):
47940b
 
47940b
     def parse_line(self, line):
47940b
         count = 0
47940b
-        for username in self.mapping.dataset.keys():
47940b
+        for username in sorted(self.mapping.dataset.keys(), reverse=True):
47940b
             if username in line:
47940b
                 count = line.count(username)
47940b
                 line = line.replace(username, self.mapping.get(username))
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 16 Jun 2021 01:15:45 -0400
47940b
Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within
47940b
 archives
47940b
47940b
This commits adds a step to `obfuscate_report()` that explicitly walks
47940b
through all directories in the archive, and obfuscates the directory
47940b
names if necessary.
47940b
47940b
Since this uses `obfuscate_string()` for the directory names, a
47940b
`skip_keys` list has been added to maps to allow parsers/maps to
47940b
specify matched keys (such as short names for the hostname parser) that
47940b
should not be considered when obfuscating directory names (e.g. 'www').
47940b
47940b
Closes: #2465
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/__init__.py              | 26 ++++++++++++++++++++++++++
47940b
 sos/cleaner/mappings/__init__.py     |  4 +++-
47940b
 sos/cleaner/mappings/hostname_map.py |  5 +++++
47940b
 sos/cleaner/obfuscation_archive.py   | 20 ++++++++++++++++++--
47940b
 sos/cleaner/parsers/__init__.py      |  2 ++
47940b
 5 files changed, 54 insertions(+), 3 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index b38c8dfc..88d4d0ea 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -562,6 +562,11 @@ third party.
47940b
                 except Exception as err:
47940b
                     self.log_debug("Unable to parse file %s: %s"
47940b
                                    % (short_name, err))
47940b
+            try:
47940b
+                self.obfuscate_directory_names(archive)
47940b
+            except Exception as err:
47940b
+                self.log_info("Failed to obfuscate directories: %s" % err,
47940b
+                              caller=archive.archive_name)
47940b
 
47940b
             # if the archive was already a tarball, repack it
47940b
             method = archive.get_compression()
47940b
@@ -663,6 +668,27 @@ third party.
47940b
 
47940b
         return subs
47940b
 
47940b
+    def obfuscate_directory_names(self, archive):
47940b
+        """For all directories that exist within the archive, obfuscate the
47940b
+        directory name if it contains sensitive strings found during execution
47940b
+        """
47940b
+        self.log_info("Obfuscating directory names in archive %s"
47940b
+                      % archive.archive_name)
47940b
+        for dirpath in sorted(archive.get_directory_list(), reverse=True):
47940b
+            for _name in os.listdir(dirpath):
47940b
+                _dirname = os.path.join(dirpath, _name)
47940b
+                _arc_dir = _dirname.split(archive.extracted_path)[-1]
47940b
+                if os.path.isdir(_dirname):
47940b
+                    _ob_dirname = self.obfuscate_string(_name)
47940b
+                    if _ob_dirname != _name:
47940b
+                        _ob_arc_dir = _arc_dir.rstrip(_name)
47940b
+                        _ob_arc_dir = os.path.join(
47940b
+                            archive.extracted_path,
47940b
+                            _ob_arc_dir.lstrip('/'),
47940b
+                            _ob_dirname
47940b
+                        )
47940b
+                        os.rename(_dirname, _ob_arc_dir)
47940b
+
47940b
     def obfuscate_string(self, string_data):
47940b
         for parser in self.parsers:
47940b
             try:
47940b
diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
47940b
index dd464e5a..5cf5c8b2 100644
47940b
--- a/sos/cleaner/mappings/__init__.py
47940b
+++ b/sos/cleaner/mappings/__init__.py
47940b
@@ -20,8 +20,10 @@ class SoSMap():
47940b
     corresponding SoSMap() object, to allow for easy retrieval of obfuscated
47940b
     items.
47940b
     """
47940b
-
47940b
+    # used for regex skips in parser.parse_line()
47940b
     ignore_matches = []
47940b
+    # used for filename obfuscations in parser.parse_string_for_keys()
47940b
+    skip_keys = []
47940b
 
47940b
     def __init__(self):
47940b
         self.dataset = {}
47940b
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
47940b
index e0b7bf1d..c9a44d8d 100644
47940b
--- a/sos/cleaner/mappings/hostname_map.py
47940b
+++ b/sos/cleaner/mappings/hostname_map.py
47940b
@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap):
47940b
         '^com..*'
47940b
     ]
47940b
 
47940b
+    skip_keys = [
47940b
+        'www',
47940b
+        'api'
47940b
+    ]
47940b
+
47940b
     host_count = 0
47940b
     domain_count = 0
47940b
     _domains = {}
47940b
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
47940b
index 88f978d9..90188358 100644
47940b
--- a/sos/cleaner/obfuscation_archive.py
47940b
+++ b/sos/cleaner/obfuscation_archive.py
47940b
@@ -202,10 +202,22 @@ class SoSObfuscationArchive():
47940b
         """Return a list of all files within the archive"""
47940b
         self.file_list = []
47940b
         for dirname, dirs, files in os.walk(self.extracted_path):
47940b
+            for _dir in dirs:
47940b
+                _dirpath = os.path.join(dirname, _dir)
47940b
+                # catch dir-level symlinks
47940b
+                if os.path.islink(_dirpath) and os.path.isdir(_dirpath):
47940b
+                    self.file_list.append(_dirpath)
47940b
             for filename in files:
47940b
                 self.file_list.append(os.path.join(dirname, filename))
47940b
         return self.file_list
47940b
 
47940b
+    def get_directory_list(self):
47940b
+        """Return a list of all directories within the archive"""
47940b
+        dir_list = []
47940b
+        for dirname, dirs, files in os.walk(self.extracted_path):
47940b
+            dir_list.append(dirname)
47940b
+        return dir_list
47940b
+
47940b
     def update_sub_count(self, fname, count):
47940b
         """Called when a file has finished being parsed and used to track
47940b
         total substitutions made and number of files that had changes made
47940b
@@ -230,7 +242,8 @@ class SoSObfuscationArchive():
47940b
                                         archive root
47940b
         """
47940b
 
47940b
-        if not os.path.isfile(self.get_file_path(filename)):
47940b
+        if (not os.path.isfile(self.get_file_path(filename)) and not
47940b
+                os.path.islink(self.get_file_path(filename))):
47940b
             return True
47940b
 
47940b
         for _skip in self.skip_list:
47940b
@@ -266,7 +279,10 @@ class SoSObfuscationArchive():
47940b
             if re.match(_arc_reg, fname):
47940b
                 return True
47940b
 
47940b
-        return self.file_is_binary(fname)
47940b
+        if os.path.isfile(self.get_file_path(fname)):
47940b
+            return self.file_is_binary(fname)
47940b
+        # don't fail on dir-level symlinks
47940b
+        return False
47940b
 
47940b
     def file_is_binary(self, fname):
47940b
         """Determine if the file is a binary file or not.
47940b
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
47940b
index cfa20b95..84874475 100644
47940b
--- a/sos/cleaner/parsers/__init__.py
47940b
+++ b/sos/cleaner/parsers/__init__.py
47940b
@@ -107,6 +107,8 @@ class SoSCleanerParser():
47940b
         for pair in sorted(self.mapping.dataset.items(), reverse=True,
47940b
                            key=lambda x: len(x[0])):
47940b
             key, val = pair
47940b
+            if key in self.mapping.skip_keys:
47940b
+                continue
47940b
             if key in string_data:
47940b
                 string_data = string_data.replace(key, val)
47940b
         return string_data
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Wed, 16 Jun 2021 02:24:51 -0400
47940b
Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with
47940b
 FQDN substrings
47940b
47940b
This commit updates the hostname parser and associated map to be able to
47940b
better detect and obfuscate FQDN substrings within file content and file
47940b
names, particularly when the regex patterns failed to match a hostname
47940b
that is formatted with '_' characters rather than '.' characters.
47940b
47940b
The `get()` method has been updated to alow preserve characters and
47940b
certain extensions that are not part of the FQDN, but are brought in by
47940b
the regex pattern due to the fact that we need to use word boundary
47940b
indicators within the pattern.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/mappings/hostname_map.py   | 59 +++++++++++++++++++++++---
47940b
 sos/cleaner/parsers/__init__.py        |  3 +-
47940b
 sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++---
47940b
 3 files changed, 81 insertions(+), 11 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
47940b
index c9a44d8d..d4b2c88e 100644
47940b
--- a/sos/cleaner/mappings/hostname_map.py
47940b
+++ b/sos/cleaner/mappings/hostname_map.py
47940b
@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap):
47940b
         host = domain.split('.')
47940b
         if len(host) == 1:
47940b
             # don't block on host's shortname
47940b
-            return True
47940b
+            return host[0] in self.hosts.keys()
47940b
         else:
47940b
             domain = host[0:-1]
47940b
             for known_domain in self._domains:
47940b
@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap):
47940b
         return False
47940b
 
47940b
     def get(self, item):
47940b
-        if item.startswith(('.', '_')):
47940b
-            item = item.lstrip('._')
47940b
-        item = item.strip()
47940b
+        prefix = ''
47940b
+        suffix = ''
47940b
+        final = None
47940b
+        # The regex pattern match may include a leading and/or trailing '_'
47940b
+        # character due to the need to use word boundary matching, so we need
47940b
+        # to strip these from the string during processing, but still keep them
47940b
+        # in the returned string to not mangle the string replacement in the
47940b
+        # context of the file or filename
47940b
+        while item.startswith(('.', '_')):
47940b
+            prefix += item[0]
47940b
+            item = item[1:]
47940b
+        while item.endswith(('.', '_')):
47940b
+            suffix += item[-1]
47940b
+            item = item[0:-1]
47940b
         if not self.domain_name_in_loaded_domains(item.lower()):
47940b
             return item
47940b
-        return super(SoSHostnameMap, self).get(item)
47940b
+        if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
47940b
+            ext = '.' + item.split('.')[-1]
47940b
+            item = item.replace(ext, '')
47940b
+            suffix += ext
47940b
+        if item not in self.dataset.keys():
47940b
+            # try to account for use of '-' in names that include hostnames
47940b
+            # and don't create new mappings for each of these
47940b
+            for _existing in sorted(self.dataset.keys(), reverse=True,
47940b
+                                    key=lambda x: len(x)):
47940b
+                _host_substr = False
47940b
+                _test = item.split(_existing)
47940b
+                _h = _existing.split('.')
47940b
+                # avoid considering a full FQDN match as a new match off of
47940b
+                # the hostname of an existing match
47940b
+                if _h[0] and _h[0] in self.hosts.keys():
47940b
+                    _host_substr = True
47940b
+                if len(_test) == 1 or not _test[0]:
47940b
+                    # does not match existing obfuscation
47940b
+                    continue
47940b
+                elif _test[0].endswith('.') and not _host_substr:
47940b
+                    # new hostname in known domain
47940b
+                    final = super(SoSHostnameMap, self).get(item)
47940b
+                    break
47940b
+                elif item.split(_test[0]):
47940b
+                    # string that includes existing FQDN obfuscation substring
47940b
+                    # so, only obfuscate the FQDN part
47940b
+                    try:
47940b
+                        itm = item.split(_test[0])[1]
47940b
+                        final = _test[0] + super(SoSHostnameMap, self).get(itm)
47940b
+                        break
47940b
+                    except Exception:
47940b
+                        # fallback to still obfuscating the entire item
47940b
+                        pass
47940b
+
47940b
+        if not final:
47940b
+            final = super(SoSHostnameMap, self).get(item)
47940b
+        return prefix + final + suffix
47940b
 
47940b
     def sanitize_item(self, item):
47940b
         host = item.split('.')
47940b
@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap):
47940b
         """Obfuscate the short name of the host with an incremented counter
47940b
         based on the total number of obfuscated host names
47940b
         """
47940b
+        if not hostname:
47940b
+            return hostname
47940b
         if hostname not in self.hosts:
47940b
             ob_host = "host%s" % self.host_count
47940b
             self.hosts[hostname] = ob_host
47940b
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
47940b
index 84874475..57d2020a 100644
47940b
--- a/sos/cleaner/parsers/__init__.py
47940b
+++ b/sos/cleaner/parsers/__init__.py
47940b
@@ -87,7 +87,8 @@ class SoSCleanerParser():
47940b
                 for match in matches:
47940b
                     match = match.strip()
47940b
                     new_match = self.mapping.get(match)
47940b
-                    line = line.replace(match, new_match)
47940b
+                    if new_match != match:
47940b
+                        line = line.replace(match, new_match)
47940b
         return line, count
47940b
 
47940b
     def parse_string_for_keys(self, string_data):
47940b
diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
47940b
index 9982024b..3de6bb08 100644
47940b
--- a/sos/cleaner/parsers/hostname_parser.py
47940b
+++ b/sos/cleaner/parsers/hostname_parser.py
47940b
@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser):
47940b
     map_file_key = 'hostname_map'
47940b
     prep_map_file = 'sos_commands/host/hostname'
47940b
     regex_patterns = [
47940b
-        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'
47940b
+        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
47940b
     ]
47940b
 
47940b
     def __init__(self, conf_file=None, opt_domains=None):
47940b
@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser):
47940b
         """Override the default parse_line() method to also check for the
47940b
         shortname of the host derived from the hostname.
47940b
         """
47940b
+
47940b
+        def _check_line(ln, count, search, repl=None):
47940b
+            """Perform a second manual check for substrings that may have been
47940b
+            missed by regex matching
47940b
+            """
47940b
+            if search in self.mapping.skip_keys:
47940b
+                return ln, count
47940b
+            if search in ln:
47940b
+                count += ln.count(search)
47940b
+                ln = ln.replace(search, self.mapping.get(repl or search))
47940b
+            return ln, count
47940b
+
47940b
         count = 0
47940b
         line, count = super(SoSHostnameParser, self).parse_line(line)
47940b
-        for short_name in self.short_names:
47940b
-            if short_name in line:
47940b
-                count += 1
47940b
-                line = line.replace(short_name, self.mapping.get(short_name))
47940b
+        # make an additional pass checking for '_' formatted substrings that
47940b
+        # the regex patterns won't catch
47940b
+        hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
47940b
+        for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
47940b
+            fqdn = host
47940b
+            for c in '.-':
47940b
+                fqdn = fqdn.replace(c, '_')
47940b
+            line, count = _check_line(line, count, fqdn, host)
47940b
+
47940b
+        for short_name in sorted(self.short_names, reverse=True):
47940b
+            line, count = _check_line(line, count, short_name)
47940b
+
47940b
         return line, count
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Mon, 14 Jun 2021 09:31:07 -0400
47940b
Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile
47940b
47940b
Adds explicit setting of primary/node sos options for the `openshift`
47940b
plugin within the cluster, rather than relying on default configurations
47940b
and best practices to avoid duplicate collections.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++--
47940b
 sos/collector/sosnode.py      |  4 +--
47940b
 2 files changed, 65 insertions(+), 4 deletions(-)
47940b
47940b
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
47940b
index 283fcfd1..ddff84a4 100644
47940b
--- a/sos/collector/clusters/ocp.py
47940b
+++ b/sos/collector/clusters/ocp.py
47940b
@@ -8,6 +8,8 @@
47940b
 #
47940b
 # See the LICENSE file in the source distribution for further information.
47940b
 
47940b
+import os
47940b
+
47940b
 from pipes import quote
47940b
 from sos.collector.clusters import Cluster
47940b
 
47940b
@@ -18,10 +20,14 @@ class ocp(Cluster):
47940b
     cluster_name = 'OpenShift Container Platform v4'
47940b
     packages = ('openshift-hyperkube', 'openshift-clients')
47940b
 
47940b
+    api_collect_enabled = False
47940b
+    token = None
47940b
+
47940b
     option_list = [
47940b
         ('label', '', 'Colon delimited list of labels to select nodes with'),
47940b
         ('role', '', 'Colon delimited list of roles to select nodes with'),
47940b
-        ('kubeconfig', '', 'Path to the kubeconfig file')
47940b
+        ('kubeconfig', '', 'Path to the kubeconfig file'),
47940b
+        ('token', '', 'Service account token to use for oc authorization')
47940b
     ]
47940b
 
47940b
     def fmt_oc_cmd(self, cmd):
47940b
@@ -32,9 +38,20 @@ class ocp(Cluster):
47940b
             return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
47940b
         return "oc %s" % cmd
47940b
 
47940b
+    def _attempt_oc_login(self):
47940b
+        """Attempt to login to the API using the oc command using a provided
47940b
+        token
47940b
+        """
47940b
+        _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True"
47940b
+                                     " --token=%s" % self.token)
47940b
+        return _res['status'] == 0
47940b
+
47940b
     def check_enabled(self):
47940b
         if super(ocp, self).check_enabled():
47940b
             return True
47940b
+        self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
47940b
+        if self.token:
47940b
+            self._attempt_oc_login()
47940b
         _who = self.fmt_oc_cmd('whoami')
47940b
         return self.exec_master_cmd(_who)['status'] == 0
47940b
 
47940b
@@ -106,4 +123,48 @@ class ocp(Cluster):
47940b
         return 'master' in self.node_dict[sosnode.address]['roles']
47940b
 
47940b
     def set_master_options(self, node):
47940b
-        node.opts.enable_plugins.append('openshift')
47940b
+        node.enable_plugins.append('openshift')
47940b
+        if self.api_collect_enabled:
47940b
+            # a primary has already been enabled for API collection, disable
47940b
+            # it among others
47940b
+            node.plugin_options.append('openshift.no-oc=on')
47940b
+        else:
47940b
+            _oc_cmd = 'oc'
47940b
+            if node.host.containerized:
47940b
+                _oc_cmd = '/host/bin/oc'
47940b
+                # when run from a container, the oc command does not inherit
47940b
+                # the default config, so if it's present then pass it here to
47940b
+                # detect a funcitonal oc command. This is sidestepped in sos
47940b
+                # report by being able to chroot the `oc` execution which we
47940b
+                # cannot do remotely
47940b
+                if node.file_exists('/root/.kube/config', need_root=True):
47940b
+                    _oc_cmd += ' --kubeconfig /host/root/.kube/config'
47940b
+            can_oc = node.run_command("%s whoami" % _oc_cmd,
47940b
+                                      use_container=node.host.containerized,
47940b
+                                      # container is available only to root
47940b
+                                      # and if rhel, need to run sos as root
47940b
+                                      # anyways which will run oc as root
47940b
+                                      need_root=True)
47940b
+            if can_oc['status'] == 0:
47940b
+                # the primary node can already access the API
47940b
+                self.api_collect_enabled = True
47940b
+            elif self.token:
47940b
+                node.sos_env_vars['SOSOCPTOKEN'] = self.token
47940b
+                self.api_collect_enabled = True
47940b
+            elif self.get_option('kubeconfig'):
47940b
+                kc = self.get_option('kubeconfig')
47940b
+                if node.file_exists(kc):
47940b
+                    if node.host.containerized:
47940b
+                        kc = "/host/%s" % kc
47940b
+                    node.sos_env_vars['KUBECONFIG'] = kc
47940b
+                    self.api_collect_enabled = True
47940b
+            if self.api_collect_enabled:
47940b
+                msg = ("API collections will be performed on %s\nNote: API "
47940b
+                       "collections may extend runtime by 10s of minutes\n"
47940b
+                       % node.address)
47940b
+                self.soslog.info(msg)
47940b
+                self.ui_log.info(msg)
47940b
+
47940b
+    def set_node_options(self, node):
47940b
+        # don't attempt OC API collections on non-primary nodes
47940b
+        node.plugin_options.append('openshift.no-oc=on')
47940b
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
47940b
index 1c25cc34..6597d236 100644
47940b
--- a/sos/collector/sosnode.py
47940b
+++ b/sos/collector/sosnode.py
47940b
@@ -202,11 +202,11 @@ class SosNode():
47940b
                 self.opts.registry_authfile or self.host.container_authfile
47940b
             )
47940b
 
47940b
-    def file_exists(self, fname):
47940b
+    def file_exists(self, fname, need_root=False):
47940b
         """Checks for the presence of fname on the remote node"""
47940b
         if not self.local:
47940b
             try:
47940b
-                res = self.run_command("stat %s" % fname)
47940b
+                res = self.run_command("stat %s" % fname, need_root=need_root)
47940b
                 return res['status'] == 0
47940b
             except Exception:
47940b
                 return False
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Thu, 17 Jun 2021 09:52:36 -0400
47940b
Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user
47940b
47940b
The 'core' user is a common default user on containerized hosts, and
47940b
obfuscation of it is not advantageous, much like the default 'ubuntu'
47940b
user for that distribution.
47940b
47940b
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
47940b
---
47940b
 sos/cleaner/parsers/username_parser.py | 1 +
47940b
 1 file changed, 1 insertion(+)
47940b
47940b
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
47940b
index 0c3bbac4..64843205 100644
47940b
--- a/sos/cleaner/parsers/username_parser.py
47940b
+++ b/sos/cleaner/parsers/username_parser.py
47940b
@@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser):
47940b
     prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000'
47940b
     regex_patterns = []
47940b
     skip_list = [
47940b
+        'core',
47940b
         'nobody',
47940b
         'nfsnobody',
47940b
         'root'
47940b
-- 
47940b
2.26.3
47940b
47940b
47940b
From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001
47940b
From: Jake Hunsaker <jhunsake@redhat.com>
47940b
Date: Fri, 18 Jun 2021 14:26:55 -0400
47940b
Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive
47940b
 list
47940b
47940b
Fixes an issue where checksums may have gotten into the list of archives
47940b
to be cleaned, which would cause further issues later. Additionally,
47940b
prevents nested sosreports from top-level archives (such as from
47940b
`collect`) from being removed for being a binary file when that
47940b
top-level archive gets obfuscated.
47940b
---
47940b
 sos/cleaner/__init__.py            | 5 +++--
47940b
 sos/cleaner/obfuscation_archive.py | 1 +
47940b
 2 files changed, 4 insertions(+), 2 deletions(-)
47940b
47940b
diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
47940b
index 88d4d0ea..8280bc50 100644
47940b
--- a/sos/cleaner/__init__.py
47940b
+++ b/sos/cleaner/__init__.py
47940b
@@ -226,8 +226,7 @@ third party.
47940b
         nested_archives = []
47940b
         for _file in archive.getmembers():
47940b
             if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
47940b
-               (_file.name.endswith('.md5') or
47940b
-               _file.name.endswith('.sha256'))):
47940b
+                    (_file.name.endswith(('.md5', '.sha256')))):
47940b
                 nested_archives.append(_file.name.split('/')[-1])
47940b
 
47940b
         if nested_archives:
47940b
@@ -235,6 +234,8 @@ third party.
47940b
             nested_path = self.extract_archive(archive)
47940b
             for arc_file in os.listdir(nested_path):
47940b
                 if re.match('sosreport.*.tar.*', arc_file):
47940b
+                    if arc_file.endswith(('.md5', '.sha256')):
47940b
+                        continue
47940b
                     self.report_paths.append(os.path.join(nested_path,
47940b
                                                           arc_file))
47940b
             # add the toplevel extracted archive
47940b
diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
47940b
index 90188358..e357450b 100644
47940b
--- a/sos/cleaner/obfuscation_archive.py
47940b
+++ b/sos/cleaner/obfuscation_archive.py
47940b
@@ -58,6 +58,7 @@ class SoSObfuscationArchive():
47940b
         Returns: list of files and file regexes
47940b
         """
47940b
         return [
47940b
+            'sosreport-',
47940b
             'sys/firmware',
47940b
             'sys/fs',
47940b
             'sys/kernel/debug',
47940b
-- 
47940b
2.26.3
47940b