diff --git a/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch b/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch
new file mode 100644
index 0000000..ed1088b
--- /dev/null
+++ b/SOURCES/sos-bz1886711-enhance-tc-hw-offload.patch
@@ -0,0 +1,32 @@
+From bbb7f8bf522960a8ca7625f539e9e5d109abb704 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Wed, 19 May 2021 08:31:45 +0200
+Subject: [PATCH] [networking] collect also tc filter show ingress
+
+Both "tc -s filter show dev %eth [|ingress]" commands required as
+they provide different output.
+
+Resolves: #2550
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/report/plugins/networking.py | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py
+index acfa027f..35646268 100644
+--- a/sos/report/plugins/networking.py
++++ b/sos/report/plugins/networking.py
+@@ -156,7 +156,8 @@ class Networking(Plugin):
+                 "ethtool --phy-statistics " + eth,
+                 "ethtool --show-priv-flags " + eth,
+                 "ethtool --show-eee " + eth,
+-                "tc -s filter show dev " + eth
++                "tc -s filter show dev " + eth,
++                "tc -s filter show dev " + eth + " ingress",
+             ], tags=eth)
+ 
+             # skip EEPROM collection by default, as it might hang or
+-- 
+2.26.3
+
diff --git a/SOURCES/sos-bz1925419-all-gluster-files.patch b/SOURCES/sos-bz1925419-all-gluster-files.patch
new file mode 100644
index 0000000..ab24429
--- /dev/null
+++ b/SOURCES/sos-bz1925419-all-gluster-files.patch
@@ -0,0 +1,39 @@
+From 4fb834ec862228afb276ccbd45aa86c66044ea66 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Mon, 15 Mar 2021 09:09:51 +0100
+Subject: [PATCH] [gluster] collect public keys from the right dir
+
+Collection of glusterfind dir is achieved by /var/lib/gluster
+so it doesn't be collected explicitly.
+
+/var/lib/glusterd/glusterfind/.keys/ subdir is required to be
+explicitly collected, as add_copy_spec uses glob.glob() that skips
+hidden files.
+
+Resolves: #2451
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/report/plugins/gluster.py | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/sos/report/plugins/gluster.py b/sos/report/plugins/gluster.py
+index e1a89df2..952cab63 100644
+--- a/sos/report/plugins/gluster.py
++++ b/sos/report/plugins/gluster.py
+@@ -76,9 +76,8 @@ class Gluster(Plugin, RedHatPlugin):
+             "/var/lib/glusterd/",
+             # collect nfs-ganesha related configuration
+             "/run/gluster/shared_storage/nfs-ganesha/",
+-            # collect status files and public ssh keys
+-            "/var/lib/glusterd/.keys/",
+-            "/var/lib/glusterd/glusterfind/"
++            # collect public ssh keys (a_s_c skips implicit hidden files)
++            "/var/lib/glusterd/glusterfind/.keys/",
+         ] + glob.glob('/run/gluster/*tier-dht/*'))
+ 
+         if not self.get_option("all_logs"):
+-- 
+2.26.3
+
diff --git a/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch b/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch
new file mode 100644
index 0000000..07e005b
--- /dev/null
+++ b/SOURCES/sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch
@@ -0,0 +1,78 @@
+From b27140a9126ea82efb517d60bf1b8455aaf4f5a6 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Fri, 26 Mar 2021 11:12:33 -0400
+Subject: [PATCH] [cleaner] Only skip packaging-based files for the IP parser
+
+Files primarily containing package information, e.g. `installed-rpms` or
+`installed-debs`, were previously being skipped by all parsers. In
+reality, we only need to skip these for the IP parser due to the fact
+that version numbers often generate a match for IP address regexes.
+
+This will also fix a problem where if a system was the build host for
+certain packages, the hostname would remain in these files as the
+hostname parser was previously not checking these files.
+
+Closes: #2400
+Resolves: #2464
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/obfuscation_archive.py | 10 ----------
+ sos/cleaner/parsers/ip_parser.py   | 16 ++++++++++++++++
+ 2 files changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
+index 981cc05f..84ca30cd 100644
+--- a/sos/cleaner/obfuscation_archive.py
++++ b/sos/cleaner/obfuscation_archive.py
+@@ -59,20 +59,10 @@ class SoSObfuscationArchive():
+         Returns: list of files and file regexes
+         """
+         return [
+-            'installed-debs',
+-            'installed-rpms',
+-            'sos_commands/dpkg',
+-            'sos_commands/python/pip_list',
+-            'sos_commands/rpm',
+-            'sos_commands/yum/.*list.*',
+-            'sos_commands/snappy/snap_list_--all',
+-            'sos_commands/snappy/snap_--version',
+-            'sos_commands/vulkan/vulkaninfo',
+             'sys/firmware',
+             'sys/fs',
+             'sys/kernel/debug',
+             'sys/module',
+-            'var/log/.*dnf.*',
+             r'.*\.tar$',  # TODO: support archive unpacking
+             # Be explicit with these tar matches to avoid matching commands
+             r'.*\.tar\.xz',
+diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py
+index 3ea7f865..08d1cd05 100644
+--- a/sos/cleaner/parsers/ip_parser.py
++++ b/sos/cleaner/parsers/ip_parser.py
+@@ -24,6 +24,22 @@ class SoSIPParser(SoSCleanerParser):
+         # don't match package versions recorded in journals
+         r'.*dnf\[.*\]:'
+     ]
++
++    skip_files = [
++        # skip these as version numbers will frequently look like IP addresses
++        # when using regex matching
++        'installed-debs',
++        'installed-rpms',
++        'sos_commands/dpkg',
++        'sos_commands/python/pip_list',
++        'sos_commands/rpm',
++        'sos_commands/yum/.*list.*',
++        'sos_commands/snappy/snap_list_--all',
++        'sos_commands/snappy/snap_--version',
++        'sos_commands/vulkan/vulkaninfo',
++        'var/log/.*dnf.*'
++    ]
++
+     map_file_key = 'ip_map'
+     prep_map_file = 'sos_commands/networking/ip_-o_addr'
+ 
+-- 
+2.26.3
+
diff --git a/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch b/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch
new file mode 100644
index 0000000..1b62a1a
--- /dev/null
+++ b/SOURCES/sos-bz1965001-fix-avc-copystating-proc-sys.patch
@@ -0,0 +1,135 @@
+From 206d65618f20995b168dcc63090d1e6871450e90 Mon Sep 17 00:00:00 2001
+From: Pavel Moravec <pmoravec@redhat.com>
+Date: Wed, 26 May 2021 15:45:26 +0200
+Subject: [PATCH] [archive] skip copying SELinux context for /proc and /sys
+ everytime
+
+A supplement of #1399 fix, now also for adding strings or special
+device files.
+
+Also adding a (vendor) test case for it.
+
+Resolves: #2560
+
+Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
+---
+ sos/archive.py                           | 35 +++++++++++----------
+ tests/vendor_tests/redhat/rhbz1965001.py | 39 ++++++++++++++++++++++++
+ 2 files changed, 56 insertions(+), 18 deletions(-)
+ create mode 100644 tests/vendor_tests/redhat/rhbz1965001.py
+
+diff --git a/sos/archive.py b/sos/archive.py
+index 4dd31d75..b02b2475 100644
+--- a/sos/archive.py
++++ b/sos/archive.py
+@@ -326,6 +326,20 @@ class FileCacheArchive(Archive):
+             return None
+         return dest
+ 
++    def _copy_attributes(self, src, dest):
++        # copy file attributes, skip SELinux xattrs for /sys and /proc
++        try:
++            stat = os.stat(src)
++            if src.startswith("/sys/") or src.startswith("/proc/"):
++                shutil.copymode(src, dest)
++                os.utime(dest, ns=(stat.st_atime_ns, stat.st_mtime_ns))
++            else:
++                shutil.copystat(src, dest)
++            os.chown(dest, stat.st_uid, stat.st_gid)
++        except Exception as e:
++            self.log_debug("caught '%s' setting attributes of '%s'"
++                           % (e, dest))
++
+     def add_file(self, src, dest=None):
+         with self._path_lock:
+             if not dest:
+@@ -348,18 +362,7 @@ class FileCacheArchive(Archive):
+                     else:
+                         self.log_info("File %s not collected: '%s'" % (src, e))
+ 
+-                # copy file attributes, skip SELinux xattrs for /sys and /proc
+-                try:
+-                    stat = os.stat(src)
+-                    if src.startswith("/sys/") or src.startswith("/proc/"):
+-                        shutil.copymode(src, dest)
+-                        os.utime(dest, ns=(stat.st_atime_ns, stat.st_mtime_ns))
+-                    else:
+-                        shutil.copystat(src, dest)
+-                    os.chown(dest, stat.st_uid, stat.st_gid)
+-                except Exception as e:
+-                    self.log_debug("caught '%s' setting attributes of '%s'"
+-                                   % (e, dest))
++                self._copy_attributes(src, dest)
+                 file_name = "'%s'" % src
+             else:
+                 # Open file case: first rewind the file to obtain
+@@ -388,11 +391,7 @@ class FileCacheArchive(Archive):
+                 content = content.decode('utf8', 'ignore')
+             f.write(content)
+             if os.path.exists(src):
+-                try:
+-                    shutil.copystat(src, dest)
+-                except OSError as e:
+-                    self.log_error("Unable to add '%s' to archive: %s" %
+-                                   (dest, e))
++                self._copy_attributes(src, dest)
+             self.log_debug("added string at '%s' to FileCacheArchive '%s'"
+                            % (src, self._archive_root))
+ 
+@@ -501,7 +500,7 @@ class FileCacheArchive(Archive):
+                     self.log_info("add_node: %s - mknod '%s'" % (msg, dest))
+                     return
+                 raise e
+-            shutil.copystat(path, dest)
++            self._copy_attributes(path, dest)
+ 
+     def name_max(self):
+         if 'PC_NAME_MAX' in os.pathconf_names:
+diff --git a/tests/vendor_tests/redhat/rhbz1965001.py b/tests/vendor_tests/redhat/rhbz1965001.py
+new file mode 100644
+index 00000000..aa16ba81
+--- /dev/null
++++ b/tests/vendor_tests/redhat/rhbz1965001.py
+@@ -0,0 +1,39 @@
++# This file is part of the sos project: https://github.com/sosreport/sos
++#
++# This copyrighted material is made available to anyone wishing to use,
++# modify, copy, or redistribute it subject to the terms and conditions of
++# version 2 of the GNU General Public License.
++#
++# See the LICENSE file in the source distribution for further information.
++
++
++import tempfile
++import shutil
++from sos_tests import StageOneReportTest
++
++
++class rhbz1965001(StageOneReportTest):
++    """
++    Copying /proc/sys/vm/{compact_memory,drop_caches} must ignore SELinux
++    context, otherwise an attempt to set the context to files under some
++    directories like /tmp raises an AVC denial, and an ERROR
++    "Unable to add '...' to archive: [Errno 13] Permission denied: '...'
++    is raise.
++
++    https://bugzilla.redhat.com/show_bug.cgi?id=1965001
++
++    :avocado: enable
++    :avocado: tags=stageone
++    """
++
++    sos_cmd = '-o system'
++    # it is crucial to run the test case with --tmp-dir=/tmp/... as that is
++    # (an example of) directory exhibiting the relabel permission deny.
++    # /var/tmp directory allows those relabels.
++    #
++    # the directory shouldn't exist at this moment, otherwise
++    # "check to prevent multiple setUp() runs" in sos_tests.py would fail
++    _tmpdir = '/tmp/rhbz1965001_avocado_test'
++
++    def test_no_permission_denied(self):
++        self.assertSosLogNotContains("Permission denied")
+-- 
+2.26.3
+
diff --git a/SOURCES/sos-bz1967613-sssd-common.patch b/SOURCES/sos-bz1967613-sssd-common.patch
new file mode 100644
index 0000000..9937972
--- /dev/null
+++ b/SOURCES/sos-bz1967613-sssd-common.patch
@@ -0,0 +1,36 @@
+From 630dfbee936050698d33b59abd1e243c44e50af8 Mon Sep 17 00:00:00 2001
+From: Jan Jansky <jjansky@redhat.com>
+Date: Thu, 3 Jun 2021 15:04:57 +0200
+Subject: [PATCH] [sssd] sssd plugin when sssd-common
+
+We have reports that sssd logs are not
+collected, when we investigated
+we found associate wants to collect
+sssd related logs also when only
+sssd-common package is installed.
+
+We got this confirmed by sbr-idm.
+
+Resolves: #2571
+
+Signed-off-by: Jan Jansky <jjansky@redhat.com>
+---
+ sos/report/plugins/sssd.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sos/report/plugins/sssd.py b/sos/report/plugins/sssd.py
+index 17933935..6f98e90c 100644
+--- a/sos/report/plugins/sssd.py
++++ b/sos/report/plugins/sssd.py
+@@ -19,7 +19,7 @@ class Sssd(Plugin):
+ 
+     plugin_name = "sssd"
+     profiles = ('services', 'security', 'identity')
+-    packages = ('sssd',)
++    packages = ('sssd', 'sssd-common')
+ 
+     def setup(self):
+         self.add_copy_spec([
+-- 
+2.26.3
+
diff --git a/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch b/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch
new file mode 100644
index 0000000..205152f
--- /dev/null
+++ b/SOURCES/sos-bz1973675-ocp-cluster-cleaner.patch
@@ -0,0 +1,2156 @@
+From 29afda6e4ff90385d34bc61315542e7cb4baaf8d Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Fri, 9 Apr 2021 11:32:14 -0400
+Subject: [PATCH] [cleaner] Do not break iteration of parse_string_for_keys on
+ first match
+
+Previously, `parse_string_for_keys()`, called by `obfuscate_string()`
+for non-regex based obfuscations, would return on the first match in the
+string found for each parser.
+
+Instead, continue iterating over all items in each parser's dataset
+before returning the (now fully) obfuscated string.
+
+Resolves: #2480
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/parsers/__init__.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
+index dd0451df..c77300aa 100644
+--- a/sos/cleaner/parsers/__init__.py
++++ b/sos/cleaner/parsers/__init__.py
+@@ -104,7 +104,7 @@ class SoSCleanerParser():
+         """
+         for key, val in self.mapping.dataset.items():
+             if key in string_data:
+-                return string_data.replace(key, val)
++                string_data = string_data.replace(key, val)
+         return string_data
+ 
+     def get_map_contents(self):
+-- 
+2.26.3
+
+From 52e6b2ae17e128f17a84ee83b7718c2901bcd5bd Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 12 May 2021 12:39:48 -0400
+Subject: [PATCH] [collect] Add options to provide registry auth for pulling
+ images
+
+Adds options that allow a user to specify registry authentication,
+either via username/password or an authfile, to allow pulling an image
+that exists on a non-public registry.
+
+If a username/password is provided, that will be used. If not, we will
+attempt to use an authfile - either provided by the user or by a cluster
+profile.
+
+Also adds an option to forcibly pull a new(er) version of the specified
+image, to alleviate conditions where a too-old version of the image
+already exists on the host.
+
+Closes: #2534
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ man/en/sos-collect.1              | 30 +++++++++++++++++++++++
+ sos/collector/__init__.py         | 17 +++++++++++++
+ sos/collector/sosnode.py          | 40 +++++++++++++++++++++++++++----
+ sos/policies/distros/__init__.py  | 16 ++++++++++++-
+ sos/policies/distros/redhat.py    | 25 ++++++++++++-------
+ sos/policies/runtimes/__init__.py | 25 +++++++++++++++++++
+ 6 files changed, 140 insertions(+), 13 deletions(-)
+
+diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
+index 286bfe71..cdbc3257 100644
+--- a/man/en/sos-collect.1
++++ b/man/en/sos-collect.1
+@@ -26,6 +26,11 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
+     [\-\-no\-pkg\-check]
+     [\-\-no\-local]
+     [\-\-master MASTER]
++    [\-\-image IMAGE]
++    [\-\-force-pull-image]
++    [\-\-registry-user USER]
++    [\-\-registry-password PASSWORD]
++    [\-\-registry-authfile FILE]
+     [\-o ONLY_PLUGINS]
+     [\-p SSH_PORT]
+     [\-\-password]
+@@ -245,6 +250,31 @@ Specify a master node for the cluster.
+ If provided, then sos collect will check the master node, not localhost, for determining
+ the type of cluster in use.
+ .TP
++\fB\-\-image IMAGE\fR
++Specify an image to use for the temporary container created for collections on
++containerized host, if you do not want to use the default image specifed by the
++host's policy. Note that this should include the registry.
++.TP
++\fB\-\-force-pull-image\fR
++Use this option to force the container runtime to pull the specified image (even
++if it is the policy default image) even if the image already exists on the host.
++This may be useful to update an older container image on containerized hosts.
++.TP
++\fB\-\-registry-user USER\fR
++Specify the username to authenticate to the registry with in order to pull the container
++image
++.TP
++\fB\-\-registry-password PASSWORD\fR
++Specify the password to authenticate to the registry with in order to pull the container
++image. If no password is required, leave this blank.
++.TP
++\fB\-\-registry-authfile FILE\fR
++Specify the filename to use for providing authentication credentials to the registry
++to pull the container image.
++
++Note that this file must exist on the node(s) performing the pull operations, not the
++node from which \fBsos collect\fR was run.
++.TP
+ \fB\-o\fR ONLY_PLUGINS, \fB\-\-only\-plugins\fR ONLY_PLUGINS
+ Sosreport option. Run ONLY the plugins listed.
+ 
+diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
+index 1c742cf5..0624caad 100644
+--- a/sos/collector/__init__.py
++++ b/sos/collector/__init__.py
+@@ -63,6 +63,7 @@ class SoSCollector(SoSComponent):
+         'encrypt_pass': '',
+         'group': None,
+         'image': '',
++        'force_pull_image': False,
+         'jobs': 4,
+         'keywords': [],
+         'keyword_file': None,
+@@ -84,6 +85,9 @@ class SoSCollector(SoSComponent):
+         'plugin_timeout': None,
+         'cmd_timeout': None,
+         'preset': '',
++        'registry_user': None,
++        'registry_password': None,
++        'registry_authfile': None,
+         'save_group': '',
+         'since': '',
+         'skip_commands': [],
+@@ -319,6 +323,19 @@ class SoSCollector(SoSComponent):
+         collect_grp.add_argument('--image',
+                                  help=('Specify the container image to use for'
+                                        ' containerized hosts.'))
++        collect_grp.add_argument('--force-pull-image', '--pull', default=False,
++                                 action='store_true',
++                                 help='Force pull the container image even if '
++                                      'it already exists on the host')
++        collect_grp.add_argument('--registry-user', default=None,
++                                 help='Username to authenticate to the '
++                                      'registry with for pulling an image')
++        collect_grp.add_argument('--registry-password', default=None,
++                                 help='Password to authenticate to the '
++                                      'registry with for pulling an image')
++        collect_grp.add_argument('--registry-authfile', default=None,
++                                 help='Use this authfile to provide registry '
++                                      'authentication when pulling an image')
+         collect_grp.add_argument('-i', '--ssh-key', help='Specify an ssh key')
+         collect_grp.add_argument('-j', '--jobs', default=4, type=int,
+                                  help='Number of concurrent nodes to collect')
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 48693342..d1c11824 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -134,9 +134,27 @@ class SosNode():
+         """If the host is containerized, create the container we'll be using
+         """
+         if self.host.containerized:
+-            res = self.run_command(self.host.create_sos_container(),
+-                                   need_root=True)
+-            if res['status'] in [0, 125]:  # 125 means container exists
++            cmd = self.host.create_sos_container(
++                image=self.opts.image,
++                auth=self.get_container_auth(),
++                force_pull=self.opts.force_pull_image
++            )
++            res = self.run_command(cmd, need_root=True)
++            if res['status'] in [0, 125]:
++                if res['status'] == 125:
++                    if 'unable to retrieve auth token' in res['stdout']:
++                        self.log_error(
++                            "Could not pull image. Provide either a username "
++                            "and password or authfile"
++                        )
++                        raise Exception
++                    elif 'unknown: Not found' in res['stdout']:
++                        self.log_error('Specified image not found on registry')
++                        raise Exception
++                    # 'name exists' with code 125 means the container was
++                    # created successfully, so ignore it.
++                # initial creations leads to an exited container, restarting it
++                # here will keep it alive for us to exec through
+                 ret = self.run_command(self.host.restart_sos_container(),
+                                        need_root=True)
+                 if ret['status'] == 0:
+@@ -152,6 +170,20 @@ class SosNode():
+                                % res['stdout'])
+                 raise Exception
+ 
++    def get_container_auth(self):
++        """Determine what the auth string should be to pull the image used to
++        deploy our temporary container
++        """
++        if self.opts.registry_user:
++            return self.host.runtimes['default'].fmt_registry_credentials(
++                self.opts.registry_user,
++                self.opts.registry_password
++            )
++        else:
++            return self.host.runtimes['default'].fmt_registry_authfile(
++                self.opts.registry_authfile or self.host.container_authfile
++            )
++
+     def file_exists(self, fname):
+         """Checks for the presence of fname on the remote node"""
+         if not self.local:
+@@ -343,7 +375,7 @@ class SosNode():
+                           % self.commons['policy'].distro)
+             return self.commons['policy']
+         host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(),
+-                    probe_runtime=False, remote_exec=self.ssh_cmd,
++                    probe_runtime=True, remote_exec=self.ssh_cmd,
+                     remote_check=self.read_file('/etc/os-release'))
+         if host:
+             self.log_info("loaded policy %s for host" % host.distro)
+diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
+index 9fe31513..f5b9fd5b 100644
+--- a/sos/policies/distros/__init__.py
++++ b/sos/policies/distros/__init__.py
+@@ -62,6 +62,7 @@ class LinuxPolicy(Policy):
+     sos_bin_path = '/usr/bin'
+     sos_container_name = 'sos-collector-tmp'
+     container_version_command = None
++    container_authfile = None
+ 
+     def __init__(self, sysroot=None, init=None, probe_runtime=True):
+         super(LinuxPolicy, self).__init__(sysroot=sysroot,
+@@ -626,13 +627,26 @@ class LinuxPolicy(Policy):
+         """
+         return ''
+ 
+-    def create_sos_container(self):
++    def create_sos_container(self, image=None, auth=None, force_pull=False):
+         """Returns the command that will create the container that will be
+         used for running commands inside a container on hosts that require it.
+ 
+         This will use the container runtime defined for the host type to
+         launch a container. From there, we use the defined runtime to exec into
+         the container's namespace.
++
++        :param image:   The name of the image if not using the policy default
++        :type image:    ``str`` or ``None``
++
++        :param auth:    The auth string required by the runtime to pull an
++                        image from the registry
++        :type auth:     ``str`` or ``None``
++
++        :param force_pull:  Should the runtime forcibly pull the image
++        :type force_pull:   ``bool``
++
++        :returns:   The command to execute to launch the temp container
++        :rtype:     ``str``
+         """
+         return ''
+ 
+diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
+index 241d3f13..20afbcc4 100644
+--- a/sos/policies/distros/redhat.py
++++ b/sos/policies/distros/redhat.py
+@@ -452,15 +452,19 @@ support representative.
+ 
+         return self.find_preset(ATOMIC)
+ 
+-    def create_sos_container(self):
++    def create_sos_container(self, image=None, auth=None, force_pull=False):
+         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
+                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
+-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
++                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
+                 "/etc/machine-id:/etc/machine-id -v "
+-                "/etc/localtime:/etc/localtime -v /:/host {image}")
++                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
++        _image = image or self.container_image
++        _pull = '--pull=always' if force_pull else ''
+         return _cmd.format(runtime=self.container_runtime,
+                            name=self.sos_container_name,
+-                           image=self.container_image)
++                           image=_image,
++                           pull=_pull,
++                           auth=auth or '')
+ 
+     def set_cleanup_cmd(self):
+         return 'docker rm --force sos-collector-tmp'
+@@ -482,6 +486,7 @@ support representative.
+     container_image = 'registry.redhat.io/rhel8/support-tools'
+     sos_path_strip = '/host'
+     container_version_command = 'rpm -q sos'
++    container_authfile = '/var/lib/kubelet/config.json'
+ 
+     def __init__(self, sysroot=None, init=None, probe_runtime=True,
+                  remote_exec=None):
+@@ -511,15 +516,19 @@ support representative.
+         # RH OCP environments.
+         return self.find_preset(RHOCP)
+ 
+-    def create_sos_container(self):
++    def create_sos_container(self, image=None, auth=None, force_pull=False):
+         _cmd = ("{runtime} run -di --name {name} --privileged --ipc=host"
+                 " --net=host --pid=host -e HOST=/host -e NAME={name} -e "
+-                "IMAGE={image} -v /run:/run -v /var/log:/var/log -v "
++                "IMAGE={image} {pull} -v /run:/run -v /var/log:/var/log -v "
+                 "/etc/machine-id:/etc/machine-id -v "
+-                "/etc/localtime:/etc/localtime -v /:/host {image}")
++                "/etc/localtime:/etc/localtime -v /:/host {auth} {image}")
++        _image = image or self.container_image
++        _pull = '--pull=always' if force_pull else ''
+         return _cmd.format(runtime=self.container_runtime,
+                            name=self.sos_container_name,
+-                           image=self.container_image)
++                           image=_image,
++                           pull=_pull,
++                           auth=auth or '')
+ 
+     def set_cleanup_cmd(self):
+         return 'podman rm --force %s' % self.sos_container_name
+diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
+index 1a61b644..f28d6a1d 100644
+--- a/sos/policies/runtimes/__init__.py
++++ b/sos/policies/runtimes/__init__.py
+@@ -157,6 +157,31 @@ class ContainerRuntime():
+             quoted_cmd = cmd
+         return "%s %s %s" % (self.run_cmd, container, quoted_cmd)
+ 
++    def fmt_registry_credentials(self, username, password):
++        """Format a string to pass to the 'run' command of the runtime to
++        enable authorization for pulling the image during `sos collect`, if
++        needed using username and optional password creds
++
++        :param username:    The name of the registry user
++        :type username:     ``str``
++
++        :param password:    The password of the registry user
++        :type password:     ``str`` or ``None``
++
++        :returns:  The string to use to enable a run command to pull the image
++        :rtype:    ``str``
++        """
++        return "--creds=%s%s" % (username, ':' + password if password else '')
++
++    def fmt_registry_authfile(self, authfile):
++        """Format a string to pass to the 'run' command of the runtime to
++        enable authorization for pulling the image during `sos collect`, if
++        needed using an authfile.
++        """
++        if authfile:
++            return "--authfile %s" % authfile
++        return ''
++
+     def get_logs_command(self, container):
+         """Get the command string used to dump container logs from the
+         runtime
+-- 
+2.26.3
+
+From 3cbbd7df6f0700609eeef3210d7388298b9e0c21 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 12 May 2021 13:26:45 -0400
+Subject: [PATCH] [sosnode] Allow clusters to set options only for master nodes
+
+Adds a method the `Cluster` that allows a profile to set sos options
+specifically for master nodes.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/clusters/__init__.py | 21 +++++++++++++++++++++
+ sos/collector/sosnode.py           |  6 ++++++
+ 2 files changed, 27 insertions(+)
+
+diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
+index 5c002bae..bfa3aad3 100644
+--- a/sos/collector/clusters/__init__.py
++++ b/sos/collector/clusters/__init__.py
+@@ -137,6 +137,27 @@ class Cluster():
+         """
+         self.cluster_ssh_key = key
+ 
++    def set_master_options(self, node):
++        """If there is a need to set specific options in the sos command being
++        run on the cluster's master nodes, override this method in the cluster
++        profile and do that here.
++
++        :param node:       The master node
++        :type node:        ``SoSNode``
++        """
++        pass
++
++    def check_node_is_master(self, node):
++        """In the event there are multiple masters, or if the collect command
++        is being run from a system that is technically capable of enumerating
++        nodes but the cluster profiles needs to specify master-specific options
++        for other nodes, override this method in the cluster profile
++
++        :param node:        The node for the cluster to check
++        :type node:         ``SoSNode``
++        """
++        return node.address == self.master.address
++
+     def exec_master_cmd(self, cmd, need_root=False):
+         """Used to retrieve command output from a (master) node in a cluster
+ 
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index d1c11824..62666635 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -647,6 +647,10 @@ class SosNode():
+                                         self.cluster.sos_plugin_options[opt])
+                     self.opts.plugin_options.append(option)
+ 
++        # set master-only options
++        if self.cluster.check_node_is_master(self):
++            self.cluster.set_master_options(self)
++
+     def finalize_sos_cmd(self):
+         """Use host facts and compare to the cluster type to modify the sos
+         command if needed"""
+@@ -707,6 +711,8 @@ class SosNode():
+             os.path.join(self.host.sos_bin_path, self.sos_bin)
+         )
+ 
++        self.update_cmd_from_cluster()
++
+         if self.opts.only_plugins:
+             plugs = [o for o in self.opts.only_plugins
+                      if self._plugin_exists(o)]
+-- 
+2.26.3
+
+From cae9dd79a59107aa92db5f90aed356e093985bd9 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 12 May 2021 16:06:29 -0400
+Subject: [PATCH] [sosnode] Don't fail on sos-less bastion nodes used for node
+ lists
+
+If the master node is determined to not have sos installed, that is not
+necessarily a fatal error for scenarios where the 'master' node is only
+being used to enumerate node lists and is not actually part of the
+cluster. This can happen when a user is using a bastion node to
+enumerate and connect to the cluster environment, or if the local host
+is being used to enumerate nodes via cluster client tooling.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/sosnode.py | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 62666635..7e56483d 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -287,13 +287,20 @@ class SosNode():
+             # use the containerized policy's command
+             pkgs = self.run_command(self.host.container_version_command,
+                                     use_container=True, need_root=True)
+-            ver = pkgs['stdout'].strip().split('-')[1]
+-            if ver:
+-                self.sos_info['version'] = ver
+-        if 'version' in self.sos_info:
++            if pkgs['status'] == 0:
++                ver = pkgs['stdout'].strip().split('-')[1]
++                if ver:
++                    self.sos_info['version'] = ver
++            else:
++                self.sos_info['version'] = None
++        if self.sos_info['version']:
+             self.log_info('sos version is %s' % self.sos_info['version'])
+         else:
+-            self.log_error('sos is not installed on this node')
++            if not self.address == self.opts.master:
++                # in the case where the 'master' enumerates nodes but is not
++                # intended for collection (bastions), don't worry about sos not
++                # being present
++                self.log_error('sos is not installed on this node')
+             self.connected = False
+             return False
+         cmd = 'sosreport -l'
+-- 
+2.26.3
+
+From cc5abe563d855dea9ac25f56de2e493228b48bf7 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 12 May 2021 18:26:09 -0400
+Subject: [PATCH] [sosnode] Mark sos commands as explicitly needing root for
+ containers
+
+Fixes an issue where the sos inspection commands were not properly
+marked as needing to be run as root (either directly or via sudo) for
+containerized hosts, which would lead to incorrect sos command
+formatting.
+
+Mark those commands, and the final container removal command, as
+explicitly needing root permissions.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/sosnode.py | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 7e56483d..1fc03076 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -304,7 +304,7 @@ class SosNode():
+             self.connected = False
+             return False
+         cmd = 'sosreport -l'
+-        sosinfo = self.run_command(cmd, use_container=True)
++        sosinfo = self.run_command(cmd, use_container=True, need_root=True)
+         if sosinfo['status'] == 0:
+             self._load_sos_plugins(sosinfo['stdout'])
+         if self.check_sos_version('3.6'):
+@@ -312,7 +312,7 @@ class SosNode():
+ 
+     def _load_sos_presets(self):
+         cmd = 'sosreport --list-presets'
+-        res = self.run_command(cmd, use_container=True)
++        res = self.run_command(cmd, use_container=True, need_root=True)
+         if res['status'] == 0:
+             for line in res['stdout'].splitlines():
+                 if line.strip().startswith('name:'):
+@@ -996,7 +996,7 @@ class SosNode():
+             self.remove_file(self.sos_path + '.md5')
+         cleanup = self.host.set_cleanup_cmd()
+         if cleanup:
+-            self.run_command(cleanup)
++            self.run_command(cleanup, need_root=True)
+ 
+     def collect_extra_cmd(self, filenames):
+         """Collect the file created by a cluster outside of sos"""
+-- 
+2.26.3
+
+From 55e77ad4c7e90ba14b10c5fdf18b65aa5d6b9cf8 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 12 May 2021 18:55:31 -0400
+Subject: [PATCH] [ocp] Add cluster profile for OCP4
+
+Removes the previous OCP cluster profile and replaces it with an updated
+one for OCP4 which is entirely separated from the kubernetes profile.
+
+Resolves: #2544
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/clusters/kubernetes.py |   8 --
+ sos/collector/clusters/ocp.py        | 109 +++++++++++++++++++++++++++
+ 2 files changed, 109 insertions(+), 8 deletions(-)
+ create mode 100644 sos/collector/clusters/ocp.py
+
+diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
+index 6a867e31..08fd9554 100644
+--- a/sos/collector/clusters/kubernetes.py
++++ b/sos/collector/clusters/kubernetes.py
+@@ -44,11 +44,3 @@ class kubernetes(Cluster):
+             return nodes
+         else:
+             raise Exception('Node enumeration did not return usable output')
+-
+-
+-class openshift(kubernetes):
+-
+-    cluster_name = 'OpenShift Container Platform'
+-    packages = ('atomic-openshift',)
+-    sos_preset = 'ocp'
+-    cmd = 'oc'
+diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
+new file mode 100644
+index 00000000..283fcfd1
+--- /dev/null
++++ b/sos/collector/clusters/ocp.py
+@@ -0,0 +1,109 @@
++# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
++
++# This file is part of the sos project: https://github.com/sosreport/sos
++#
++# This copyrighted material is made available to anyone wishing to use,
++# modify, copy, or redistribute it subject to the terms and conditions of
++# version 2 of the GNU General Public License.
++#
++# See the LICENSE file in the source distribution for further information.
++
++from pipes import quote
++from sos.collector.clusters import Cluster
++
++
++class ocp(Cluster):
++    """OpenShift Container Platform v4"""
++
++    cluster_name = 'OpenShift Container Platform v4'
++    packages = ('openshift-hyperkube', 'openshift-clients')
++
++    option_list = [
++        ('label', '', 'Colon delimited list of labels to select nodes with'),
++        ('role', '', 'Colon delimited list of roles to select nodes with'),
++        ('kubeconfig', '', 'Path to the kubeconfig file')
++    ]
++
++    def fmt_oc_cmd(self, cmd):
++        """Format the oc command to optionall include the kubeconfig file if
++        one is specified
++        """
++        if self.get_option('kubeconfig'):
++            return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
++        return "oc %s" % cmd
++
++    def check_enabled(self):
++        if super(ocp, self).check_enabled():
++            return True
++        _who = self.fmt_oc_cmd('whoami')
++        return self.exec_master_cmd(_who)['status'] == 0
++
++    def _build_dict(self, nodelist):
++        """From the output of get_nodes(), construct an easier-to-reference
++        dict of nodes that will be used in determining labels, master status,
++        etc...
++
++        :param nodelist:        The split output of `oc get nodes`
++        :type nodelist:         ``list``
++
++        :returns:           A dict of nodes with `get nodes` columns as keys
++        :rtype:             ``dict``
++        """
++        nodes = {}
++        if 'NAME' in nodelist[0]:
++            # get the index of the fields
++            statline = nodelist.pop(0).split()
++            idx = {}
++            for state in ['status', 'roles', 'version', 'os-image']:
++                try:
++                    idx[state] = statline.index(state.upper())
++                except Exception:
++                    pass
++            for node in nodelist:
++                _node = node.split()
++                nodes[_node[0]] = {}
++                for column in idx:
++                    nodes[_node[0]][column] = _node[idx[column]]
++        return nodes
++
++    def get_nodes(self):
++        nodes = []
++        self.node_dict = {}
++        cmd = 'get nodes -o wide'
++        if self.get_option('label'):
++            labels = ','.join(self.get_option('label').split(':'))
++            cmd += " -l %s" % quote(labels)
++        res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
++        if res['status'] == 0:
++            roles = [r for r in self.get_option('role').split(':')]
++            self.node_dict = self._build_dict(res['stdout'].splitlines())
++            for node in self.node_dict:
++                if roles:
++                    for role in roles:
++                        if role in node:
++                            nodes.append(node)
++                else:
++                    nodes.append(node)
++        else:
++            msg = "'oc' command failed"
++            if 'Missing or incomplete' in res['stdout']:
++                msg = ("'oc' failed due to missing kubeconfig on master node."
++                       " Specify one via '-c ocp.kubeconfig=<path>'")
++            raise Exception(msg)
++        return nodes
++
++    def set_node_label(self, node):
++        if node.address not in self.node_dict:
++            return ''
++        for label in ['master', 'worker']:
++            if label in self.node_dict[node.address]['roles']:
++                return label
++        return ''
++
++    def check_node_is_master(self, sosnode):
++        if sosnode.address not in self.node_dict:
++            return False
++        return 'master' in self.node_dict[sosnode.address]['roles']
++
++    def set_master_options(self, node):
++        node.opts.enable_plugins.append('openshift')
+-- 
+2.26.3
+
+From a3c1caad21160545eda87ea1fde93e972a6fbf88 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 26 May 2021 11:55:24 -0400
+Subject: [PATCH] [cleaner] Don't strip empty lines from substituted files
+
+Fixes an issue where empty lines would be stripped from files that have
+other obfuscations in them. Those empty lines may be important for file
+structure and/or readability, so we should instead simply not pass empty
+lines to the parsers rather than skipping them wholesale in the flow of
+writing obfuscations to a temp file before replacing the source file
+with a potentially changed temp file.
+
+Resolves: #2562
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/__init__.py | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index bdd24f95..55465b85 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -603,8 +603,6 @@ third party.
+         tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
+         with open(filename, 'r') as fname:
+             for line in fname:
+-                if not line.strip():
+-                    continue
+                 try:
+                     line, count = self.obfuscate_line(line)
+                     subs += count
+@@ -642,7 +640,11 @@ third party.
+ 
+         Returns the fully obfuscated line and the number of substitutions made
+         """
++        # don't iterate over blank lines, but still write them to the tempfile
++        # to maintain the same structure when we write a scrubbed file back
+         count = 0
++        if not line.strip():
++            return line, count
+         for parser in self.parsers:
+             try:
+                 line, _count = parser.parse_line(line)
+-- 
+2.26.3
+
+From 892bbd8114703f5a4d23aa77ba5829b7ba59446f Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 5 May 2021 17:02:04 -0400
+Subject: [PATCH] [cleaner] Remove binary files by default
+
+Binary files generally speaking cannot be obfuscated, and as such we
+should remove them from archives being obfuscated by default so that
+sensitive data is not mistakenly included in an obfuscated archive.
+
+This commits adds a new `--keep-binary-files` option that if used will
+keep any encountered binary files in the final archive. The default
+option of `false` will ensure that encountered binary files are removed.
+
+The number of removed binary files per archive is reported when
+obfuscation is completed for that archive.
+
+Closes: #2478
+Resolves: #2524
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ man/en/sos-clean.1                          |  12 ++++
+ sos/cleaner/__init__.py                     |  21 +++++-
+ sos/cleaner/obfuscation_archive.py          |  67 ++++++++++++++++++--
+ sos/collector/__init__.py                   |   5 ++
+ sos/report/__init__.py                      |   6 ++
+ 8 files changed, 167 insertions(+), 7 deletions(-)
+
+diff --git a/man/en/sos-clean.1 b/man/en/sos-clean.1
+index 4856b43b..b77bc63c 100644
+--- a/man/en/sos-clean.1
++++ b/man/en/sos-clean.1
+@@ -9,6 +9,7 @@ sos clean - Obfuscate sensitive data from one or more sosreports
+     [\-\-map-file]
+     [\-\-jobs]
+     [\-\-no-update]
++    [\-\-keep-binary-files]
+ 
+ .SH DESCRIPTION
+ \fBsos clean\fR or \fBsos mask\fR is an sos subcommand used to obfuscate sensitive information from
+@@ -77,6 +78,17 @@ Default: 4
+ .TP
+ .B \-\-no-update
+ Do not write the mapping file contents to /etc/sos/cleaner/default_mapping
++.TP
++.B \-\-keep-binary-files
++Keep unprocessable binary files in the archive, rather than removing them.
++
++Note that binary files cannot be obfuscated, and thus keeping them in the archive
++may result in otherwise sensitive information being included in the final archive.
++Users should review any archive that keeps binary files in place before sending to
++a third party.
++
++Default: False (remove encountered binary files)
++
+ .SH SEE ALSO
+ .BR sos (1)
+ .BR sos-report (1)
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index 55465b85..f88ff8a0 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -47,6 +47,7 @@ class SoSCleaner(SoSComponent):
+         'keyword_file': None,
+         'map_file': '/etc/sos/cleaner/default_mapping',
+         'no_update': False,
++        'keep_binary_files': False,
+         'target': '',
+         'usernames': []
+     }
+@@ -183,6 +184,11 @@ third party.
+                                action='store_true',
+                                help='Do not update the --map file with new '
+                                     'mappings from this run')
++        clean_grp.add_argument('--keep-binary-files', default=False,
++                               action='store_true',
++                               dest='keep_binary_files',
++                               help='Keep unprocessable binary files in the '
++                                    'archive instead of removing them')
+         clean_grp.add_argument('--usernames', dest='usernames', default=[],
+                                action='extend',
+                                help='List of usernames to obfuscate')
+@@ -467,6 +473,11 @@ third party.
+                        "%s concurrently\n"
+                        % (len(self.report_paths), self.opts.jobs))
+                 self.ui_log.info(msg)
++            if self.opts.keep_binary_files:
++                self.ui_log.warning(
++                    "WARNING: binary files that potentially contain sensitive "
++                    "information will NOT be removed from the final archive\n"
++                )
+             pool = ThreadPoolExecutor(self.opts.jobs)
+             pool.map(self.obfuscate_report, self.report_paths, chunksize=1)
+             pool.shutdown(wait=True)
+@@ -539,6 +550,10 @@ third party.
+                 short_name = fname.split(archive.archive_name + '/')[1]
+                 if archive.should_skip_file(short_name):
+                     continue
++                if (not self.opts.keep_binary_files and
++                        archive.should_remove_file(short_name)):
++                    archive.remove_file(short_name)
++                    continue
+                 try:
+                     count = self.obfuscate_file(fname, short_name,
+                                                 archive.archive_name)
+@@ -574,7 +589,11 @@ third party.
+             arc_md.add_field('files_obfuscated', len(archive.file_sub_list))
+             arc_md.add_field('total_substitutions', archive.total_sub_count)
+             self.completed_reports.append(archive)
+-            archive.report_msg("Obfuscation completed")
++            rmsg = ''
++            if archive.removed_file_count:
++                rmsg = " [removed %s unprocessable files]"
++                rmsg = rmsg % archive.removed_file_count
++            archive.report_msg("Obfuscation completed%s" % rmsg)
+ 
+         except Exception as err:
+             self.ui_log.info("Exception while processing %s: %s"
+diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
+index c64ab13b..76841b51 100644
+--- a/sos/cleaner/obfuscation_archive.py
++++ b/sos/cleaner/obfuscation_archive.py
+@@ -28,6 +28,7 @@ class SoSObfuscationArchive():
+ 
+     file_sub_list = []
+     total_sub_count = 0
++    removed_file_count = 0
+ 
+     def __init__(self, archive_path, tmpdir):
+         self.archive_path = archive_path
+@@ -62,11 +63,7 @@ class SoSObfuscationArchive():
+             'sys/firmware',
+             'sys/fs',
+             'sys/kernel/debug',
+-            'sys/module',
+-            r'.*\.tar$',  # TODO: support archive unpacking
+-            # Be explicit with these tar matches to avoid matching commands
+-            r'.*\.tar\.xz',
+-            '.*.gz'
++            'sys/module'
+         ]
+ 
+     @property
+@@ -76,6 +73,17 @@ class SoSObfuscationArchive():
+         except Exception:
+             return False
+ 
++    def remove_file(self, fname):
++        """Remove a file from the archive. This is used when cleaner encounters
++        a binary file, which we cannot reliably obfuscate.
++        """
++        full_fname = self.get_file_path(fname)
++        # don't call a blank remove() here
++        if full_fname:
++            self.log_info("Removing binary file '%s' from archive" % fname)
++            os.remove(full_fname)
++            self.removed_file_count += 1
++
+     def extract(self):
+         if self.is_tarfile:
+             self.report_msg("Extracting...")
+@@ -227,3 +235,52 @@ class SoSObfuscationArchive():
+             if filename.startswith(_skip) or re.match(_skip, filename):
+                 return True
+         return False
++
++    def should_remove_file(self, fname):
++        """Determine if the file should be removed or not, due to an inability
++        to reliably obfuscate that file based on the filename.
++
++        :param fname:       Filename relative to the extracted archive root
++        :type fname:        ``str``
++
++        :returns:   ``True`` if the file cannot be reliably obfuscated
++        :rtype:     ``bool``
++        """
++        obvious_removes = [
++            r'.*\.gz',  # TODO: support flat gz/xz extraction
++            r'.*\.xz',
++            r'.*\.bzip2',
++            r'.*\.tar\..*',  # TODO: support archive unpacking
++            r'.*\.txz$',
++            r'.*\.tgz$',
++            r'.*\.bin',
++            r'.*\.journal',
++            r'.*\~$'
++        ]
++
++        # if the filename matches, it is obvious we can remove them without
++        # doing the read test
++        for _arc_reg in obvious_removes:
++            if re.match(_arc_reg, fname):
++                return True
++
++        return self.file_is_binary(fname)
++
++    def file_is_binary(self, fname):
++        """Determine if the file is a binary file or not.
++
++
++        :param fname:          Filename relative to the extracted archive root
++        :type fname:           ``str``
++
++        :returns:   ``True`` if file is binary, else ``False``
++        :rtype:     ``bool``
++        """
++        with open(self.get_file_path(fname), 'tr') as tfile:
++            try:
++                # when opened as above (tr), reading binary content will raise
++                # an exception
++                tfile.read(1)
++                return False
++            except UnicodeDecodeError:
++                return True
+diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
+index 9884836c..469db60d 100644
+--- a/sos/collector/__init__.py
++++ b/sos/collector/__init__.py
+@@ -67,6 +67,7 @@ class SoSCollector(SoSComponent):
+         'jobs': 4,
+         'keywords': [],
+         'keyword_file': None,
++        'keep_binary_files': False,
+         'label': '',
+         'list_options': False,
+         'log_size': 0,
+@@ -410,6 +411,10 @@ class SoSCollector(SoSComponent):
+                                  dest='clean',
+                                  default=False, action='store_true',
+                                  help='Obfuscate sensistive information')
++        cleaner_grp.add_argument('--keep-binary-files', default=False,
++                                 action='store_true', dest='keep_binary_files',
++                                 help='Keep unprocessable binary files in the '
++                                      'archive instead of removing them')
+         cleaner_grp.add_argument('--domains', dest='domains', default=[],
+                                  action='extend',
+                                  help='Additional domain names to obfuscate')
+diff --git a/sos/report/__init__.py b/sos/report/__init__.py
+index d4345409..2cedc76e 100644
+--- a/sos/report/__init__.py
++++ b/sos/report/__init__.py
+@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
+         'case_id': '',
+         'chroot': 'auto',
+         'clean': False,
++        'keep_binary_files': False,
+         'desc': '',
+         'domains': [],
+         'dry_run': False,
+@@ -344,6 +345,11 @@ class SoSReport(SoSComponent):
+                                  default='/etc/sos/cleaner/default_mapping',
+                                  help=('Provide a previously generated mapping'
+                                        ' file for obfuscation'))
++        cleaner_grp.add_argument('--keep-binary-files', default=False,
++                                 action='store_true',
++                                 dest='keep_binary_files',
++                                 help='Keep unprocessable binary files in the '
++                                      'archive instead of removing them')
+         cleaner_grp.add_argument('--usernames', dest='usernames', default=[],
+                                  action='extend',
+                                  help='List of usernames to obfuscate')
+
+From aed0102a1d6ef9a030c9e5349f092b51b9d1f22d Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Fri, 11 Jun 2021 23:20:59 -0400
+Subject: [PATCH 01/10] [SoSNode] Allow individually setting node options
+
+Like we now do for primary nodes, add the ability to individually set
+node options via a new `set_node_options()` method for when blanket
+setting options across all nodes via the options class attrs is not
+sufficient.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/clusters/__init__.py | 10 ++++++++++
+ sos/collector/sosnode.py           |  6 ++++--
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
+index 90e62d79..c4da1ab8 100644
+--- a/sos/collector/clusters/__init__.py
++++ b/sos/collector/clusters/__init__.py
+@@ -137,6 +137,16 @@ class Cluster():
+         """
+         self.cluster_ssh_key = key
+ 
++    def set_node_options(self, node):
++        """If there is a need to set specific options on ONLY the non-primary
++        nodes in a collection, override this method in the cluster profile
++        and do that here.
++
++        :param node:        The non-primary node
++        :type node:         ``SoSNode``
++        """
++        pass
++
+     def set_master_options(self, node):
+         """If there is a need to set specific options in the sos command being
+         run on the cluster's master nodes, override this method in the cluster
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 1fc03076..7e784aa1 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -657,6 +657,8 @@ class SosNode():
+         # set master-only options
+         if self.cluster.check_node_is_master(self):
+             self.cluster.set_master_options(self)
++        else:
++            self.cluster.set_node_options(self)
+ 
+     def finalize_sos_cmd(self):
+         """Use host facts and compare to the cluster type to modify the sos
+@@ -713,13 +715,13 @@ class SosNode():
+                 sos_opts.append('--cmd-timeout=%s'
+                                 % quote(str(self.opts.cmd_timeout)))
+ 
++        self.update_cmd_from_cluster()
++
+         sos_cmd = sos_cmd.replace(
+             'sosreport',
+             os.path.join(self.host.sos_bin_path, self.sos_bin)
+         )
+ 
+-        self.update_cmd_from_cluster()
+-
+         if self.opts.only_plugins:
+             plugs = [o for o in self.opts.only_plugins
+                      if self._plugin_exists(o)]
+-- 
+2.26.3
+
+
+From 96f166699d12704cc7cf73cb8b13278675f68730 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Sat, 12 Jun 2021 00:02:36 -0400
+Subject: [PATCH 02/10] [sosnode] Support passing env vars to `run_command()`
+
+Updates `run_command()` to support passing new environment variables to
+the command being run, for that command alone. This parameter takes a
+dict, and if set we will first copy the existing set of env vars on the
+node and then update that set of variables using the passed dict.
+
+Additionally, `execute_sos_command()` will now try to pass a new
+`sos_env_vars` dict (default empty) so that clusters may set environment
+variables specifically for the sos command being run, without having to
+modify the actual sos command being executed.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/sosnode.py | 27 ++++++++++++++++++++++++---
+ 1 file changed, 24 insertions(+), 3 deletions(-)
+
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 7e784aa1..40472a4e 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -45,6 +45,8 @@ class SosNode():
+         self.host = None
+         self.cluster = None
+         self.hostname = None
++        self.sos_env_vars = {}
++        self._env_vars = {}
+         self._password = password or self.opts.password
+         if not self.opts.nopasswd_sudo and not self.opts.sudo_pw:
+             self.opts.sudo_pw = self._password
+@@ -109,6 +111,21 @@ class SosNode():
+     def _fmt_msg(self, msg):
+         return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg)
+ 
++    @property
++    def env_vars(self):
++        if not self._env_vars:
++            if self.local:
++                self._env_vars = os.environ.copy()
++            else:
++                ret = self.run_command("env --null")
++                if ret['status'] == 0:
++                    for ln in ret['output'].split('\x00'):
++                        if not ln:
++                            continue
++                        _val = ln.split('=')
++                        self._env_vars[_val[0]] = _val[1]
++        return self._env_vars
++
+     def set_node_manifest(self, manifest):
+         """Set the manifest section that this node will write to
+         """
+@@ -404,7 +421,7 @@ class SosNode():
+         return self.host.package_manager.pkg_by_name(pkg) is not None
+ 
+     def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
+-                    force_local=False, use_container=False):
++                    force_local=False, use_container=False, env=None):
+         """Runs a given cmd, either via the SSH session or locally
+ 
+         Arguments:
+@@ -446,7 +463,10 @@ class SosNode():
+         else:
+             if get_pty:
+                 cmd = "/bin/bash -c %s" % quote(cmd)
+-        res = pexpect.spawn(cmd, encoding='utf-8')
++        if env:
++            _cmd_env = self.env_vars
++            _cmd_env.update(env)
++        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
+         if need_root:
+             if self.need_sudo:
+                 res.sendline(self.opts.sudo_pw)
+@@ -830,7 +850,8 @@ class SosNode():
+             res = self.run_command(self.sos_cmd,
+                                    timeout=self.opts.timeout,
+                                    get_pty=True, need_root=True,
+-                                   use_container=True)
++                                   use_container=True,
++                                   env=self.sos_env_vars)
+             if res['status'] == 0:
+                 for line in res['stdout'].splitlines():
+                     if fnmatch.fnmatch(line, '*sosreport-*tar*'):
+-- 
+2.26.3
+
+
+From a9e1632113406a646bdd7525982b699cf790aedb Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Tue, 15 Jun 2021 12:43:27 -0400
+Subject: [PATCH 03/10] [collect|sosnode] Avoiding clobbering sos options
+ between nodes
+
+This commit overhauls the function of `finalize_sos_cmd()` in several
+ways.
+
+First, assign the sos report plugin related options directly to private
+copies of those values for each node, so that the shared cluster profile
+does not clober options between nodes.
+
+Second, provide a default Lock mechanism for clusters that need to
+perform some node-comparison logic when assigning options based on node
+role.
+
+Finally, finalize the sos command for each node _prior_ to the call to
+`SoSNode.sosreport()` so that we can be sure that clusters are able to
+appropriately compare and assign sos options across nodes before some
+nodes have already started and/or finished their own sos report
+collections.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/__init__.py          | 14 +++++
+ sos/collector/clusters/__init__.py |  2 +
+ sos/collector/sosnode.py           | 89 +++++++++++++++++-------------
+ 3 files changed, 67 insertions(+), 38 deletions(-)
+
+diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
+index 469db60d..7b8cfcf7 100644
+--- a/sos/collector/__init__.py
++++ b/sos/collector/__init__.py
+@@ -1186,6 +1186,10 @@ this utility or remote systems that it connects to.
+                              "concurrently\n"
+                              % (self.report_num, self.opts.jobs))
+ 
++            npool = ThreadPoolExecutor(self.opts.jobs)
++            npool.map(self._finalize_sos_cmd, self.client_list, chunksize=1)
++            npool.shutdown(wait=True)
++
+             pool = ThreadPoolExecutor(self.opts.jobs)
+             pool.map(self._collect, self.client_list, chunksize=1)
+             pool.shutdown(wait=True)
+@@ -1217,6 +1221,16 @@ this utility or remote systems that it connects to.
+             except Exception as err:
+                 self.ui_log.error("Upload attempt failed: %s" % err)
+ 
++    def _finalize_sos_cmd(self, client):
++        """Calls finalize_sos_cmd() on each node so that we have the final
++        command before we thread out the actual execution of sos
++        """
++        try:
++            client.finalize_sos_cmd()
++        except Exception as err:
++            self.log_error("Could not finalize sos command for %s: %s"
++                           % (client.address, err))
++
+     def _collect(self, client):
+         """Runs sosreport on each node"""
+         try:
+diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
+index c4da1ab8..bb728bc0 100644
+--- a/sos/collector/clusters/__init__.py
++++ b/sos/collector/clusters/__init__.py
+@@ -11,6 +11,7 @@
+ import logging
+ 
+ from sos.options import ClusterOption
++from threading import Lock
+ 
+ 
+ class Cluster():
+@@ -66,6 +67,7 @@ class Cluster():
+             if cls.__name__ != 'Cluster':
+                 self.cluster_type.append(cls.__name__)
+         self.node_list = None
++        self.lock = Lock()
+         self.soslog = logging.getLogger('sos')
+         self.ui_log = logging.getLogger('sos_ui')
+         self.options = []
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 40472a4e..1c25cc34 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -38,6 +38,7 @@ class SosNode():
+         self.address = address.strip()
+         self.commons = commons
+         self.opts = commons['cmdlineopts']
++        self._assign_config_opts()
+         self.tmpdir = commons['tmpdir']
+         self.hostlen = commons['hostlen']
+         self.need_sudo = commons['need_sudo']
+@@ -465,8 +466,8 @@ class SosNode():
+                 cmd = "/bin/bash -c %s" % quote(cmd)
+         if env:
+             _cmd_env = self.env_vars
+-            _cmd_env.update(env)
+-        res = pexpect.spawn(cmd, encoding='utf-8', env=_cmd_env)
++            env = _cmd_env.update(env)
++        res = pexpect.spawn(cmd, encoding='utf-8', env=env)
+         if need_root:
+             if self.need_sudo:
+                 res.sendline(self.opts.sudo_pw)
+@@ -484,9 +485,6 @@ class SosNode():
+ 
+     def sosreport(self):
+         """Run a sosreport on the node, then collect it"""
+-        self.sos_cmd = self.finalize_sos_cmd()
+-        self.log_info('Final sos command set to %s' % self.sos_cmd)
+-        self.manifest.add_field('final_sos_command', self.sos_cmd)
+         try:
+             path = self.execute_sos_command()
+             if path:
+@@ -656,29 +654,42 @@ class SosNode():
+         This will NOT override user supplied options.
+         """
+         if self.cluster.sos_preset:
+-            if not self.opts.preset:
+-                self.opts.preset = self.cluster.sos_preset
++            if not self.preset:
++                self.preset = self.cluster.sos_preset
+             else:
+                 self.log_info('Cluster specified preset %s but user has also '
+                               'defined a preset. Using user specification.'
+                               % self.cluster.sos_preset)
+         if self.cluster.sos_plugins:
+             for plug in self.cluster.sos_plugins:
+-                if plug not in self.opts.enable_plugins:
+-                    self.opts.enable_plugins.append(plug)
++                if plug not in self.enable_plugins:
++                    self.enable_plugins.append(plug)
+ 
+         if self.cluster.sos_plugin_options:
+             for opt in self.cluster.sos_plugin_options:
+-                if not any(opt in o for o in self.opts.plugin_options):
++                if not any(opt in o for o in self.plugin_options):
+                     option = '%s=%s' % (opt,
+                                         self.cluster.sos_plugin_options[opt])
+-                    self.opts.plugin_options.append(option)
++                    self.plugin_options.append(option)
+ 
+         # set master-only options
+         if self.cluster.check_node_is_master(self):
+-            self.cluster.set_master_options(self)
++            with self.cluster.lock:
++                self.cluster.set_master_options(self)
+         else:
+-            self.cluster.set_node_options(self)
++            with self.cluster.lock:
++                self.cluster.set_node_options(self)
++
++    def _assign_config_opts(self):
++        """From the global opts configuration, assign those values locally
++        to this node so that they may be acted on individually.
++        """
++        # assign these to new, private copies
++        self.only_plugins = list(self.opts.only_plugins)
++        self.skip_plugins = list(self.opts.skip_plugins)
++        self.enable_plugins = list(self.opts.enable_plugins)
++        self.plugin_options = list(self.opts.plugin_options)
++        self.preset = list(self.opts.preset)
+ 
+     def finalize_sos_cmd(self):
+         """Use host facts and compare to the cluster type to modify the sos
+@@ -742,59 +753,61 @@ class SosNode():
+             os.path.join(self.host.sos_bin_path, self.sos_bin)
+         )
+ 
+-        if self.opts.only_plugins:
+-            plugs = [o for o in self.opts.only_plugins
+-                     if self._plugin_exists(o)]
+-            if len(plugs) != len(self.opts.only_plugins):
+-                not_only = list(set(self.opts.only_plugins) - set(plugs))
++        if self.only_plugins:
++            plugs = [o for o in self.only_plugins if self._plugin_exists(o)]
++            if len(plugs) != len(self.only_plugins):
++                not_only = list(set(self.only_plugins) - set(plugs))
+                 self.log_debug('Requested plugins %s were requested to be '
+                                'enabled but do not exist' % not_only)
+-            only = self._fmt_sos_opt_list(self.opts.only_plugins)
++            only = self._fmt_sos_opt_list(self.only_plugins)
+             if only:
+                 sos_opts.append('--only-plugins=%s' % quote(only))
+-            return "%s %s" % (sos_cmd, ' '.join(sos_opts))
++            self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
++            self.log_info('Final sos command set to %s' % self.sos_cmd)
++            self.manifest.add_field('final_sos_command', self.sos_cmd)
++            return
+ 
+-        if self.opts.skip_plugins:
++        if self.skip_plugins:
+             # only run skip-plugins for plugins that are enabled
+-            skip = [o for o in self.opts.skip_plugins
+-                    if self._check_enabled(o)]
+-            if len(skip) != len(self.opts.skip_plugins):
+-                not_skip = list(set(self.opts.skip_plugins) - set(skip))
++            skip = [o for o in self.skip_plugins if self._check_enabled(o)]
++            if len(skip) != len(self.skip_plugins):
++                not_skip = list(set(self.skip_plugins) - set(skip))
+                 self.log_debug('Requested to skip plugins %s, but plugins are '
+                                'already not enabled' % not_skip)
+             skipln = self._fmt_sos_opt_list(skip)
+             if skipln:
+                 sos_opts.append('--skip-plugins=%s' % quote(skipln))
+ 
+-        if self.opts.enable_plugins:
++        if self.enable_plugins:
+             # only run enable for plugins that are disabled
+-            opts = [o for o in self.opts.enable_plugins
+-                    if o not in self.opts.skip_plugins
++            opts = [o for o in self.enable_plugins
++                    if o not in self.skip_plugins
+                     and self._check_disabled(o) and self._plugin_exists(o)]
+-            if len(opts) != len(self.opts.enable_plugins):
+-                not_on = list(set(self.opts.enable_plugins) - set(opts))
++            if len(opts) != len(self.enable_plugins):
++                not_on = list(set(self.enable_plugins) - set(opts))
+                 self.log_debug('Requested to enable plugins %s, but plugins '
+                                'are already enabled or do not exist' % not_on)
+             enable = self._fmt_sos_opt_list(opts)
+             if enable:
+                 sos_opts.append('--enable-plugins=%s' % quote(enable))
+ 
+-        if self.opts.plugin_options:
+-            opts = [o for o in self.opts.plugin_options
++        if self.plugin_options:
++            opts = [o for o in self.plugin_options
+                     if self._plugin_exists(o.split('.')[0])
+                     and self._plugin_option_exists(o.split('=')[0])]
+             if opts:
+                 sos_opts.append('-k %s' % quote(','.join(o for o in opts)))
+ 
+-        if self.opts.preset:
+-            if self._preset_exists(self.opts.preset):
+-                sos_opts.append('--preset=%s' % quote(self.opts.preset))
++        if self.preset:
++            if self._preset_exists(self.preset):
++                sos_opts.append('--preset=%s' % quote(self.preset))
+             else:
+                 self.log_debug('Requested to enable preset %s but preset does '
+-                               'not exist on node' % self.opts.preset)
++                               'not exist on node' % self.preset)
+ 
+-        _sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
+-        return _sos_cmd
++        self.sos_cmd = "%s %s" % (sos_cmd, ' '.join(sos_opts))
++        self.log_info('Final sos command set to %s' % self.sos_cmd)
++        self.manifest.add_field('final_sos_command', self.sos_cmd)
+ 
+     def determine_sos_label(self):
+         """Determine what, if any, label should be added to the sosreport"""
+-- 
+2.26.3
+
+
+From 7e6c078e51143f7064190b316a251ddd8d431495 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Tue, 15 Jun 2021 18:38:34 -0400
+Subject: [PATCH 04/10] [cleaner] Improve handling of symlink obfuscation
+
+Improves handling of symlink obfuscation by only performing the
+obfuscaiton on the ultimate target of any symlinks encountered. Now,
+when a symlink is encountered, clean will obfuscate the link name and
+re-write it in the archive, pointing to the (potentially obfuscated)
+target name.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/__init__.py | 65 +++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 19 deletions(-)
+
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index abfb684b..b38c8dfc 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -612,28 +612,55 @@ third party.
+         if not filename:
+             # the requested file doesn't exist in the archive
+             return
+-        self.log_debug("Obfuscating %s" % short_name or filename,
+-                       caller=arc_name)
+         subs = 0
+-        tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
+-        with open(filename, 'r') as fname:
+-            for line in fname:
+-                try:
+-                    line, count = self.obfuscate_line(line)
+-                    subs += count
+-                    tfile.write(line)
+-                except Exception as err:
+-                    self.log_debug("Unable to obfuscate %s: %s"
+-                                   % (short_name, err), caller=arc_name)
+-        tfile.seek(0)
+-        if subs:
+-            shutil.copy(tfile.name, filename)
+-        tfile.close()
+-        _ob_filename = self.obfuscate_string(short_name)
+-        if _ob_filename != short_name:
++        if not os.path.islink(filename):
++            # don't run the obfuscation on the link, but on the actual file
++            # at some other point.
++            self.log_debug("Obfuscating %s" % short_name or filename,
++                           caller=arc_name)
++            tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
++            with open(filename, 'r') as fname:
++                for line in fname:
++                    try:
++                        line, count = self.obfuscate_line(line)
++                        subs += count
++                        tfile.write(line)
++                    except Exception as err:
++                        self.log_debug("Unable to obfuscate %s: %s"
++                                       % (short_name, err), caller=arc_name)
++            tfile.seek(0)
++            if subs:
++                shutil.copy(tfile.name, filename)
++            tfile.close()
++
++        _ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
++        _ob_filename = short_name.replace(short_name.split('/')[-1],
++                                          _ob_short_name)
++        _sym_changed = False
++        if os.path.islink(filename):
++            _link = os.readlink(filename)
++            _ob_link = self.obfuscate_string(_link)
++            if _ob_link != _link:
++                _sym_changed = True
++
++        if (_ob_filename != short_name) or _sym_changed:
+             arc_path = filename.split(short_name)[0]
+             _ob_path = os.path.join(arc_path, _ob_filename)
+-            os.rename(filename, _ob_path)
++            # ensure that any plugin subdirs that contain obfuscated strings
++            # get created with obfuscated counterparts
++            if not os.path.islink(filename):
++                os.rename(filename, _ob_path)
++            else:
++                # generate the obfuscated name of the link target
++                _target_ob = self.obfuscate_string(os.readlink(filename))
++                # remove the unobfuscated original symlink first, in case the
++                # symlink name hasn't changed but the target has
++                os.remove(filename)
++                # create the newly obfuscated symlink, pointing to the
++                # obfuscated target name, which may not exist just yet, but
++                # when the actual file is obfuscated, will be created
++                os.symlink(_target_ob, _ob_path)
++
+         return subs
+ 
+     def obfuscate_string(self, string_data):
+-- 
+2.26.3
+
+
+From b5d166ac9ff79bc3740c5e66f16d60762f9a0ac0 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Tue, 15 Jun 2021 22:56:19 -0400
+Subject: [PATCH 05/10] [cleaner] Iterate over matches with most precise match
+ first
+
+When matching strings in parsers to do obfuscation, we should be using
+the most precise matches found first, rather than matching in the order
+a match is hit. This ensures that we correctly obfuscate an entire
+string, rather than potentially only partial substring(s) that exist
+within the entire match.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/parsers/__init__.py        | 10 +++++++---
+ sos/cleaner/parsers/keyword_parser.py  |  2 +-
+ sos/cleaner/parsers/username_parser.py |  2 +-
+ 3 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
+index c77300aa..cfa20b95 100644
+--- a/sos/cleaner/parsers/__init__.py
++++ b/sos/cleaner/parsers/__init__.py
+@@ -82,10 +82,12 @@ class SoSCleanerParser():
+         for pattern in self.regex_patterns:
+             matches = [m[0] for m in re.findall(pattern, line, re.I)]
+             if matches:
++                matches.sort(reverse=True, key=lambda x: len(x))
+                 count += len(matches)
+                 for match in matches:
+-                    new_match = self.mapping.get(match.strip())
+-                    line = line.replace(match.strip(), new_match)
++                    match = match.strip()
++                    new_match = self.mapping.get(match)
++                    line = line.replace(match, new_match)
+         return line, count
+ 
+     def parse_string_for_keys(self, string_data):
+@@ -102,7 +104,9 @@ class SoSCleanerParser():
+         :returns: The obfuscated line
+         :rtype: ``str``
+         """
+-        for key, val in self.mapping.dataset.items():
++        for pair in sorted(self.mapping.dataset.items(), reverse=True,
++                           key=lambda x: len(x[0])):
++            key, val = pair
+             if key in string_data:
+                 string_data = string_data.replace(key, val)
+         return string_data
+diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
+index 3dc2b7f0..9134f82d 100644
+--- a/sos/cleaner/parsers/keyword_parser.py
++++ b/sos/cleaner/parsers/keyword_parser.py
+@@ -42,7 +42,7 @@ class SoSKeywordParser(SoSCleanerParser):
+ 
+     def parse_line(self, line):
+         count = 0
+-        for keyword in self.user_keywords:
++        for keyword in sorted(self.user_keywords, reverse=True):
+             if keyword in line:
+                 line = line.replace(keyword, self.mapping.get(keyword))
+                 count += 1
+diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
+index 2bb6c7f3..0c3bbac4 100644
+--- a/sos/cleaner/parsers/username_parser.py
++++ b/sos/cleaner/parsers/username_parser.py
+@@ -51,7 +51,7 @@ class SoSUsernameParser(SoSCleanerParser):
+ 
+     def parse_line(self, line):
+         count = 0
+-        for username in self.mapping.dataset.keys():
++        for username in sorted(self.mapping.dataset.keys(), reverse=True):
+             if username in line:
+                 count = line.count(username)
+                 line = line.replace(username, self.mapping.get(username))
+-- 
+2.26.3
+
+
+From 7ed138fcd2ee6ece3e7fbd9e48293b212e0b4e41 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 16 Jun 2021 01:15:45 -0400
+Subject: [PATCH 06/10] [cleaner] Explicitly obfuscate directory names within
+ archives
+
+This commits adds a step to `obfuscate_report()` that explicitly walks
+through all directories in the archive, and obfuscates the directory
+names if necessary.
+
+Since this uses `obfuscate_string()` for the directory names, a
+`skip_keys` list has been added to maps to allow parsers/maps to
+specify matched keys (such as short names for the hostname parser) that
+should not be considered when obfuscating directory names (e.g. 'www').
+
+Closes: #2465
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/__init__.py              | 26 ++++++++++++++++++++++++++
+ sos/cleaner/mappings/__init__.py     |  4 +++-
+ sos/cleaner/mappings/hostname_map.py |  5 +++++
+ sos/cleaner/obfuscation_archive.py   | 20 ++++++++++++++++++--
+ sos/cleaner/parsers/__init__.py      |  2 ++
+ 5 files changed, 54 insertions(+), 3 deletions(-)
+
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index b38c8dfc..88d4d0ea 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -562,6 +562,11 @@ third party.
+                 except Exception as err:
+                     self.log_debug("Unable to parse file %s: %s"
+                                    % (short_name, err))
++            try:
++                self.obfuscate_directory_names(archive)
++            except Exception as err:
++                self.log_info("Failed to obfuscate directories: %s" % err,
++                              caller=archive.archive_name)
+ 
+             # if the archive was already a tarball, repack it
+             method = archive.get_compression()
+@@ -663,6 +668,27 @@ third party.
+ 
+         return subs
+ 
++    def obfuscate_directory_names(self, archive):
++        """For all directories that exist within the archive, obfuscate the
++        directory name if it contains sensitive strings found during execution
++        """
++        self.log_info("Obfuscating directory names in archive %s"
++                      % archive.archive_name)
++        for dirpath in sorted(archive.get_directory_list(), reverse=True):
++            for _name in os.listdir(dirpath):
++                _dirname = os.path.join(dirpath, _name)
++                _arc_dir = _dirname.split(archive.extracted_path)[-1]
++                if os.path.isdir(_dirname):
++                    _ob_dirname = self.obfuscate_string(_name)
++                    if _ob_dirname != _name:
++                        _ob_arc_dir = _arc_dir.rstrip(_name)
++                        _ob_arc_dir = os.path.join(
++                            archive.extracted_path,
++                            _ob_arc_dir.lstrip('/'),
++                            _ob_dirname
++                        )
++                        os.rename(_dirname, _ob_arc_dir)
++
+     def obfuscate_string(self, string_data):
+         for parser in self.parsers:
+             try:
+diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
+index dd464e5a..5cf5c8b2 100644
+--- a/sos/cleaner/mappings/__init__.py
++++ b/sos/cleaner/mappings/__init__.py
+@@ -20,8 +20,10 @@ class SoSMap():
+     corresponding SoSMap() object, to allow for easy retrieval of obfuscated
+     items.
+     """
+-
++    # used for regex skips in parser.parse_line()
+     ignore_matches = []
++    # used for filename obfuscations in parser.parse_string_for_keys()
++    skip_keys = []
+ 
+     def __init__(self):
+         self.dataset = {}
+diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
+index e0b7bf1d..c9a44d8d 100644
+--- a/sos/cleaner/mappings/hostname_map.py
++++ b/sos/cleaner/mappings/hostname_map.py
+@@ -35,6 +35,11 @@ class SoSHostnameMap(SoSMap):
+         '^com..*'
+     ]
+ 
++    skip_keys = [
++        'www',
++        'api'
++    ]
++
+     host_count = 0
+     domain_count = 0
+     _domains = {}
+diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
+index 88f978d9..90188358 100644
+--- a/sos/cleaner/obfuscation_archive.py
++++ b/sos/cleaner/obfuscation_archive.py
+@@ -202,10 +202,22 @@ class SoSObfuscationArchive():
+         """Return a list of all files within the archive"""
+         self.file_list = []
+         for dirname, dirs, files in os.walk(self.extracted_path):
++            for _dir in dirs:
++                _dirpath = os.path.join(dirname, _dir)
++                # catch dir-level symlinks
++                if os.path.islink(_dirpath) and os.path.isdir(_dirpath):
++                    self.file_list.append(_dirpath)
+             for filename in files:
+                 self.file_list.append(os.path.join(dirname, filename))
+         return self.file_list
+ 
++    def get_directory_list(self):
++        """Return a list of all directories within the archive"""
++        dir_list = []
++        for dirname, dirs, files in os.walk(self.extracted_path):
++            dir_list.append(dirname)
++        return dir_list
++
+     def update_sub_count(self, fname, count):
+         """Called when a file has finished being parsed and used to track
+         total substitutions made and number of files that had changes made
+@@ -230,7 +242,8 @@ class SoSObfuscationArchive():
+                                         archive root
+         """
+ 
+-        if not os.path.isfile(self.get_file_path(filename)):
++        if (not os.path.isfile(self.get_file_path(filename)) and not
++                os.path.islink(self.get_file_path(filename))):
+             return True
+ 
+         for _skip in self.skip_list:
+@@ -266,7 +279,10 @@ class SoSObfuscationArchive():
+             if re.match(_arc_reg, fname):
+                 return True
+ 
+-        return self.file_is_binary(fname)
++        if os.path.isfile(self.get_file_path(fname)):
++            return self.file_is_binary(fname)
++        # don't fail on dir-level symlinks
++        return False
+ 
+     def file_is_binary(self, fname):
+         """Determine if the file is a binary file or not.
+diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
+index cfa20b95..84874475 100644
+--- a/sos/cleaner/parsers/__init__.py
++++ b/sos/cleaner/parsers/__init__.py
+@@ -107,6 +107,8 @@ class SoSCleanerParser():
+         for pair in sorted(self.mapping.dataset.items(), reverse=True,
+                            key=lambda x: len(x[0])):
+             key, val = pair
++            if key in self.mapping.skip_keys:
++                continue
+             if key in string_data:
+                 string_data = string_data.replace(key, val)
+         return string_data
+-- 
+2.26.3
+
+
+From f180150277b706e72f2445287f3d0b6943efa252 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Wed, 16 Jun 2021 02:24:51 -0400
+Subject: [PATCH 07/10] [hostname parser,map] Attempt to detect strings with
+ FQDN substrings
+
+This commit updates the hostname parser and associated map to be able to
+better detect and obfuscate FQDN substrings within file content and file
+names, particularly when the regex patterns failed to match a hostname
+that is formatted with '_' characters rather than '.' characters.
+
+The `get()` method has been updated to alow preserve characters and
+certain extensions that are not part of the FQDN, but are brought in by
+the regex pattern due to the fact that we need to use word boundary
+indicators within the pattern.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/mappings/hostname_map.py   | 59 +++++++++++++++++++++++---
+ sos/cleaner/parsers/__init__.py        |  3 +-
+ sos/cleaner/parsers/hostname_parser.py | 30 ++++++++++---
+ 3 files changed, 81 insertions(+), 11 deletions(-)
+
+diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py
+index c9a44d8d..d4b2c88e 100644
+--- a/sos/cleaner/mappings/hostname_map.py
++++ b/sos/cleaner/mappings/hostname_map.py
+@@ -104,7 +104,7 @@ class SoSHostnameMap(SoSMap):
+         host = domain.split('.')
+         if len(host) == 1:
+             # don't block on host's shortname
+-            return True
++            return host[0] in self.hosts.keys()
+         else:
+             domain = host[0:-1]
+             for known_domain in self._domains:
+@@ -113,12 +113,59 @@ class SoSHostnameMap(SoSMap):
+         return False
+ 
+     def get(self, item):
+-        if item.startswith(('.', '_')):
+-            item = item.lstrip('._')
+-        item = item.strip()
++        prefix = ''
++        suffix = ''
++        final = None
++        # The regex pattern match may include a leading and/or trailing '_'
++        # character due to the need to use word boundary matching, so we need
++        # to strip these from the string during processing, but still keep them
++        # in the returned string to not mangle the string replacement in the
++        # context of the file or filename
++        while item.startswith(('.', '_')):
++            prefix += item[0]
++            item = item[1:]
++        while item.endswith(('.', '_')):
++            suffix += item[-1]
++            item = item[0:-1]
+         if not self.domain_name_in_loaded_domains(item.lower()):
+             return item
+-        return super(SoSHostnameMap, self).get(item)
++        if item.endswith(('.yaml', '.yml', '.crt', '.key', '.pem')):
++            ext = '.' + item.split('.')[-1]
++            item = item.replace(ext, '')
++            suffix += ext
++        if item not in self.dataset.keys():
++            # try to account for use of '-' in names that include hostnames
++            # and don't create new mappings for each of these
++            for _existing in sorted(self.dataset.keys(), reverse=True,
++                                    key=lambda x: len(x)):
++                _host_substr = False
++                _test = item.split(_existing)
++                _h = _existing.split('.')
++                # avoid considering a full FQDN match as a new match off of
++                # the hostname of an existing match
++                if _h[0] and _h[0] in self.hosts.keys():
++                    _host_substr = True
++                if len(_test) == 1 or not _test[0]:
++                    # does not match existing obfuscation
++                    continue
++                elif _test[0].endswith('.') and not _host_substr:
++                    # new hostname in known domain
++                    final = super(SoSHostnameMap, self).get(item)
++                    break
++                elif item.split(_test[0]):
++                    # string that includes existing FQDN obfuscation substring
++                    # so, only obfuscate the FQDN part
++                    try:
++                        itm = item.split(_test[0])[1]
++                        final = _test[0] + super(SoSHostnameMap, self).get(itm)
++                        break
++                    except Exception:
++                        # fallback to still obfuscating the entire item
++                        pass
++
++        if not final:
++            final = super(SoSHostnameMap, self).get(item)
++        return prefix + final + suffix
+ 
+     def sanitize_item(self, item):
+         host = item.split('.')
+@@ -146,6 +193,8 @@ class SoSHostnameMap(SoSMap):
+         """Obfuscate the short name of the host with an incremented counter
+         based on the total number of obfuscated host names
+         """
++        if not hostname:
++            return hostname
+         if hostname not in self.hosts:
+             ob_host = "host%s" % self.host_count
+             self.hosts[hostname] = ob_host
+diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
+index 84874475..57d2020a 100644
+--- a/sos/cleaner/parsers/__init__.py
++++ b/sos/cleaner/parsers/__init__.py
+@@ -87,7 +87,8 @@ class SoSCleanerParser():
+                 for match in matches:
+                     match = match.strip()
+                     new_match = self.mapping.get(match)
+-                    line = line.replace(match, new_match)
++                    if new_match != match:
++                        line = line.replace(match, new_match)
+         return line, count
+ 
+     def parse_string_for_keys(self, string_data):
+diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py
+index 9982024b..3de6bb08 100644
+--- a/sos/cleaner/parsers/hostname_parser.py
++++ b/sos/cleaner/parsers/hostname_parser.py
+@@ -18,7 +18,7 @@ class SoSHostnameParser(SoSCleanerParser):
+     map_file_key = 'hostname_map'
+     prep_map_file = 'sos_commands/host/hostname'
+     regex_patterns = [
+-        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'
++        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
+     ]
+ 
+     def __init__(self, conf_file=None, opt_domains=None):
+@@ -66,10 +66,30 @@ class SoSHostnameParser(SoSCleanerParser):
+         """Override the default parse_line() method to also check for the
+         shortname of the host derived from the hostname.
+         """
++
++        def _check_line(ln, count, search, repl=None):
++            """Perform a second manual check for substrings that may have been
++            missed by regex matching
++            """
++            if search in self.mapping.skip_keys:
++                return ln, count
++            if search in ln:
++                count += ln.count(search)
++                ln = ln.replace(search, self.mapping.get(repl or search))
++            return ln, count
++
+         count = 0
+         line, count = super(SoSHostnameParser, self).parse_line(line)
+-        for short_name in self.short_names:
+-            if short_name in line:
+-                count += 1
+-                line = line.replace(short_name, self.mapping.get(short_name))
++        # make an additional pass checking for '_' formatted substrings that
++        # the regex patterns won't catch
++        hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
++        for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
++            fqdn = host
++            for c in '.-':
++                fqdn = fqdn.replace(c, '_')
++            line, count = _check_line(line, count, fqdn, host)
++
++        for short_name in sorted(self.short_names, reverse=True):
++            line, count = _check_line(line, count, short_name)
++
+         return line, count
+-- 
+2.26.3
+
+
+From ec46e6a8fac58ed757344be3751eb1f925eab981 Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Mon, 14 Jun 2021 09:31:07 -0400
+Subject: [PATCH 08/10] [ocp] Refine OCP node options in cluster profile
+
+Adds explicit setting of primary/node sos options for the `openshift`
+plugin within the cluster, rather than relying on default configurations
+and best practices to avoid duplicate collections.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/collector/clusters/ocp.py | 65 +++++++++++++++++++++++++++++++++--
+ sos/collector/sosnode.py      |  4 +--
+ 2 files changed, 65 insertions(+), 4 deletions(-)
+
+diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
+index 283fcfd1..ddff84a4 100644
+--- a/sos/collector/clusters/ocp.py
++++ b/sos/collector/clusters/ocp.py
+@@ -8,6 +8,8 @@
+ #
+ # See the LICENSE file in the source distribution for further information.
+ 
++import os
++
+ from pipes import quote
+ from sos.collector.clusters import Cluster
+ 
+@@ -18,10 +20,14 @@ class ocp(Cluster):
+     cluster_name = 'OpenShift Container Platform v4'
+     packages = ('openshift-hyperkube', 'openshift-clients')
+ 
++    api_collect_enabled = False
++    token = None
++
+     option_list = [
+         ('label', '', 'Colon delimited list of labels to select nodes with'),
+         ('role', '', 'Colon delimited list of roles to select nodes with'),
+-        ('kubeconfig', '', 'Path to the kubeconfig file')
++        ('kubeconfig', '', 'Path to the kubeconfig file'),
++        ('token', '', 'Service account token to use for oc authorization')
+     ]
+ 
+     def fmt_oc_cmd(self, cmd):
+@@ -32,9 +38,20 @@ class ocp(Cluster):
+             return "oc --config %s %s" % (self.get_option('kubeconfig'), cmd)
+         return "oc %s" % cmd
+ 
++    def _attempt_oc_login(self):
++        """Attempt to login to the API using the oc command using a provided
++        token
++        """
++        _res = self.exec_primary_cmd("oc login --insecure-skip-tls-verify=True"
++                                     " --token=%s" % self.token)
++        return _res['status'] == 0
++
+     def check_enabled(self):
+         if super(ocp, self).check_enabled():
+             return True
++        self.token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
++        if self.token:
++            self._attempt_oc_login()
+         _who = self.fmt_oc_cmd('whoami')
+         return self.exec_master_cmd(_who)['status'] == 0
+ 
+@@ -106,4 +123,48 @@ class ocp(Cluster):
+         return 'master' in self.node_dict[sosnode.address]['roles']
+ 
+     def set_master_options(self, node):
+-        node.opts.enable_plugins.append('openshift')
++        node.enable_plugins.append('openshift')
++        if self.api_collect_enabled:
++            # a primary has already been enabled for API collection, disable
++            # it among others
++            node.plugin_options.append('openshift.no-oc=on')
++        else:
++            _oc_cmd = 'oc'
++            if node.host.containerized:
++                _oc_cmd = '/host/bin/oc'
++                # when run from a container, the oc command does not inherit
++                # the default config, so if it's present then pass it here to
++                # detect a funcitonal oc command. This is sidestepped in sos
++                # report by being able to chroot the `oc` execution which we
++                # cannot do remotely
++                if node.file_exists('/root/.kube/config', need_root=True):
++                    _oc_cmd += ' --kubeconfig /host/root/.kube/config'
++            can_oc = node.run_command("%s whoami" % _oc_cmd,
++                                      use_container=node.host.containerized,
++                                      # container is available only to root
++                                      # and if rhel, need to run sos as root
++                                      # anyways which will run oc as root
++                                      need_root=True)
++            if can_oc['status'] == 0:
++                # the primary node can already access the API
++                self.api_collect_enabled = True
++            elif self.token:
++                node.sos_env_vars['SOSOCPTOKEN'] = self.token
++                self.api_collect_enabled = True
++            elif self.get_option('kubeconfig'):
++                kc = self.get_option('kubeconfig')
++                if node.file_exists(kc):
++                    if node.host.containerized:
++                        kc = "/host/%s" % kc
++                    node.sos_env_vars['KUBECONFIG'] = kc
++                    self.api_collect_enabled = True
++            if self.api_collect_enabled:
++                msg = ("API collections will be performed on %s\nNote: API "
++                       "collections may extend runtime by 10s of minutes\n"
++                       % node.address)
++                self.soslog.info(msg)
++                self.ui_log.info(msg)
++
++    def set_node_options(self, node):
++        # don't attempt OC API collections on non-primary nodes
++        node.plugin_options.append('openshift.no-oc=on')
+diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
+index 1c25cc34..6597d236 100644
+--- a/sos/collector/sosnode.py
++++ b/sos/collector/sosnode.py
+@@ -202,11 +202,11 @@ class SosNode():
+                 self.opts.registry_authfile or self.host.container_authfile
+             )
+ 
+-    def file_exists(self, fname):
++    def file_exists(self, fname, need_root=False):
+         """Checks for the presence of fname on the remote node"""
+         if not self.local:
+             try:
+-                res = self.run_command("stat %s" % fname)
++                res = self.run_command("stat %s" % fname, need_root=need_root)
+                 return res['status'] == 0
+             except Exception:
+                 return False
+-- 
+2.26.3
+
+
+From eea8e15845a8bcba91b93a5310ba693e8c20ab9c Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Thu, 17 Jun 2021 09:52:36 -0400
+Subject: [PATCH 09/10] [cleaner] Don't obfuscate default 'core' user
+
+The 'core' user is a common default user on containerized hosts, and
+obfuscation of it is not advantageous, much like the default 'ubuntu'
+user for that distribution.
+
+Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
+---
+ sos/cleaner/parsers/username_parser.py | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
+index 0c3bbac4..64843205 100644
+--- a/sos/cleaner/parsers/username_parser.py
++++ b/sos/cleaner/parsers/username_parser.py
+@@ -28,6 +28,7 @@ class SoSUsernameParser(SoSCleanerParser):
+     prep_map_file = 'sos_commands/login/lastlog_-u_1000-60000'
+     regex_patterns = []
+     skip_list = [
++        'core',
+         'nobody',
+         'nfsnobody',
+         'root'
+-- 
+2.26.3
+
+
+From 581429ca65131711c96f9d56bf2f0e18779aec2e Mon Sep 17 00:00:00 2001
+From: Jake Hunsaker <jhunsake@redhat.com>
+Date: Fri, 18 Jun 2021 14:26:55 -0400
+Subject: [PATCH 10/10] [cleaner] Fix checksum and archive pruning from archive
+ list
+
+Fixes an issue where checksums may have gotten into the list of archives
+to be cleaned, which would cause further issues later. Additionally,
+prevents nested sosreports from top-level archives (such as from
+`collect`) from being removed for being a binary file when that
+top-level archive gets obfuscated.
+---
+ sos/cleaner/__init__.py            | 5 +++--
+ sos/cleaner/obfuscation_archive.py | 1 +
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
+index 88d4d0ea..8280bc50 100644
+--- a/sos/cleaner/__init__.py
++++ b/sos/cleaner/__init__.py
+@@ -226,8 +226,7 @@ third party.
+         nested_archives = []
+         for _file in archive.getmembers():
+             if (re.match('sosreport-.*.tar', _file.name.split('/')[-1]) and not
+-               (_file.name.endswith('.md5') or
+-               _file.name.endswith('.sha256'))):
++                    (_file.name.endswith(('.md5', '.sha256')))):
+                 nested_archives.append(_file.name.split('/')[-1])
+ 
+         if nested_archives:
+@@ -235,6 +234,8 @@ third party.
+             nested_path = self.extract_archive(archive)
+             for arc_file in os.listdir(nested_path):
+                 if re.match('sosreport.*.tar.*', arc_file):
++                    if arc_file.endswith(('.md5', '.sha256')):
++                        continue
+                     self.report_paths.append(os.path.join(nested_path,
+                                                           arc_file))
+             # add the toplevel extracted archive
+diff --git a/sos/cleaner/obfuscation_archive.py b/sos/cleaner/obfuscation_archive.py
+index 90188358..e357450b 100644
+--- a/sos/cleaner/obfuscation_archive.py
++++ b/sos/cleaner/obfuscation_archive.py
+@@ -58,6 +58,7 @@ class SoSObfuscationArchive():
+         Returns: list of files and file regexes
+         """
+         return [
++            'sosreport-',
+             'sys/firmware',
+             'sys/fs',
+             'sys/kernel/debug',
+-- 
+2.26.3
+
diff --git a/SPECS/sos.spec b/SPECS/sos.spec
index 2775538..1b1481c 100644
--- a/SPECS/sos.spec
+++ b/SPECS/sos.spec
@@ -5,7 +5,7 @@
 Summary: A set of tools to gather troubleshooting information from a system
 Name: sos
 Version: 4.1
-Release: 2%{?dist}
+Release: 3%{?dist}
 Group: Applications/System
 Source0: https://github.com/sosreport/sos/archive/%{version}/sos-%{version}.tar.gz
 Source1: sos-audit-%{auditversion}.tgz
@@ -31,6 +31,12 @@ Patch8: sos-bz1956673-pulpcore-plugin.patch
 Patch9: sos-bz1959413-saphana-traceback.patch
 Patch10: sos-bz1961458-collect-nstat.patch
 Patch11: sos-bz1961229-snapper-plugin-and-allocation-failures.patch
+Patch12: sos-bz1925419-all-gluster-files.patch
+Patch13: sos-bz1964499-obfuscate-fqdn-from-dnf-log.patch
+Patch14: sos-bz1886711-enhance-tc-hw-offload.patch
+Patch15: sos-bz1965001-fix-avc-copystating-proc-sys.patch
+Patch16: sos-bz1967613-sssd-common.patch
+Patch17: sos-bz1973675-ocp-cluster-cleaner.patch
 
 
 %description
@@ -53,6 +59,12 @@ support technicians and developers.
 %patch9 -p1
 %patch10 -p1
 %patch11 -p1
+%patch12 -p1
+%patch13 -p1
+%patch14 -p1
+%patch15 -p1
+%patch16 -p1
+%patch17 -p1
 
 %build
 %py3_build
@@ -116,6 +128,20 @@ of the system. Currently storage and filesystem commands are audited.
 %ghost /etc/audit/rules.d/40-sos-storage.rules
 
 %changelog
+* Mon Jun 21 2021 Pavel Moravec <pmoravec@redhat.com> = 4.1-3
+- [gluster] collect public keys from the right dir
+  Resolves: bz1925419
+- [cleaner] Only skip packaging-based files for the IP parse
+  Resolves: bz1964499
+- [networking] collect also tc filter show ingress
+  Resolves: bz1886711
+- [archive] skip copying SELinux context for /proc and /sys
+  Resolves: bz1965001
+- [sssd] sssd plugin when sssd-common
+  Resolves: bz1967613
+- Various OCP/cluster/cleanup enhancements
+  Resolves: bz1973675
+
 * Tue May 18 2021 Pavel Moravec <pmoravec@redhat.com> = 4.1-2
 - Load maps from all archives before obfuscation
   Resolves: bz1930181