Blob Blame History Raw
From 676dfca09d9c783311a51a1c53fa0f7ecd95bd28 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Fri, 10 Sep 2021 13:38:19 -0400
Subject: [PATCH] [collect] Abstract transport protocol from SoSNode

Since its addition to sos, collect has assumed the use of a system
installation of SSH in order to connect to the nodes identified for
collection. However, there may be use cases and desires to use other
transport protocols.

As such, provide an abstraction for these protocols in the form of the
new `RemoteTransport` class that `SoSNode` will now leverage. So far an
abstraction for the currently used SSH ControlPersist function is
provided, along with a psuedo abstraction for local execution so that
SoSNode does not directly need to make more "if local then foo" checks
than are absolutely necessary.

Related: #2668

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 setup.py                                    |   4 +-
 sos/collector/__init__.py                   |  54 +--
 sos/collector/clusters/__init__.py          |   4 +-
 sos/collector/clusters/jbon.py              |   2 +
 sos/collector/clusters/kubernetes.py        |   4 +-
 sos/collector/clusters/ocp.py               |   6 +-
 sos/collector/clusters/ovirt.py             |  10 +-
 sos/collector/clusters/pacemaker.py         |   8 +-
 sos/collector/clusters/satellite.py         |   4 +-
 sos/collector/sosnode.py                    | 388 +++++---------------
 sos/collector/transports/__init__.py        | 317 ++++++++++++++++
 sos/collector/transports/control_persist.py | 199 ++++++++++
 sos/collector/transports/local.py           |  49 +++
 13 files changed, 705 insertions(+), 344 deletions(-)
 create mode 100644 sos/collector/transports/__init__.py
 create mode 100644 sos/collector/transports/control_persist.py
 create mode 100644 sos/collector/transports/local.py

diff --git a/setup.py b/setup.py
index 7653b59d..25e87a71 100644
--- a/setup.py
+++ b/setup.py
@@ -101,8 +101,8 @@ setup(
         'sos.policies.distros', 'sos.policies.runtimes',
         'sos.policies.package_managers', 'sos.policies.init_systems',
         'sos.report', 'sos.report.plugins', 'sos.collector',
-        'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings',
-        'sos.cleaner.parsers', 'sos.cleaner.archives'
+        'sos.collector.clusters', 'sos.collector.transports', 'sos.cleaner',
+        'sos.cleaner.mappings', 'sos.cleaner.parsers', 'sos.cleaner.archives'
     ],
     cmdclass=cmdclass,
     command_options=command_options,
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index b2a07f37..da912655 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -17,7 +17,6 @@ import re
 import string
 import socket
 import shutil
-import subprocess
 import sys
 
 from datetime import datetime
@@ -28,7 +27,6 @@ from pipes import quote
 from textwrap import fill
 from sos.cleaner import SoSCleaner
 from sos.collector.sosnode import SosNode
-from sos.collector.exceptions import ControlPersistUnsupportedException
 from sos.options import ClusterOption
 from sos.component import SoSComponent
 from sos import __version__
@@ -154,7 +152,6 @@ class SoSCollector(SoSComponent):
             try:
                 self.parse_node_strings()
                 self.parse_cluster_options()
-                self._check_for_control_persist()
                 self.log_debug('Executing %s' % ' '.join(s for s in sys.argv))
                 self.log_debug("Found cluster profiles: %s"
                                % self.clusters.keys())
@@ -437,33 +434,6 @@ class SoSCollector(SoSComponent):
                                  action='extend',
                                  help='List of usernames to obfuscate')
 
-    def _check_for_control_persist(self):
-        """Checks to see if the local system supported SSH ControlPersist.
-
-        ControlPersist allows OpenSSH to keep a single open connection to a
-        remote host rather than building a new session each time. This is the
-        same feature that Ansible uses in place of paramiko, which we have a
-        need to drop in sos-collector.
-
-        This check relies on feedback from the ssh binary. The command being
-        run should always generate stderr output, but depending on what that
-        output reads we can determine if ControlPersist is supported or not.
-
-        For our purposes, a host that does not support ControlPersist is not
-        able to run sos-collector.
-
-        Returns
-            True if ControlPersist is supported, else raise Exception.
-        """
-        ssh_cmd = ['ssh', '-o', 'ControlPersist']
-        cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
-                               stderr=subprocess.PIPE)
-        out, err = cmd.communicate()
-        err = err.decode('utf-8')
-        if 'Bad configuration option' in err or 'Usage:' in err:
-            raise ControlPersistUnsupportedException
-        return True
-
     def exit(self, msg, error=1):
         """Used to safely terminate if sos-collector encounters an error"""
         self.log_error(msg)
@@ -455,7 +455,7 @@ class SoSCollector(SoSComponent):
             'cmdlineopts': self.opts,
             'need_sudo': True if self.opts.ssh_user != 'root' else False,
             'tmpdir': self.tmpdir,
-            'hostlen': len(self.opts.master) or len(self.hostname),
+            'hostlen': max(len(self.opts.primary), len(self.hostname)),
             'policy': self.policy
         }
 
@@ -1020,9 +1020,10 @@ class SoSCollector(SoSComponent):
             self.node_list.append(self.hostname)
         self.reduce_node_list()
         try:
-            self.commons['hostlen'] = len(max(self.node_list, key=len))
+            _node_max = len(max(self.node_list, key=len))
+            self.commons['hostlen'] = max(_node_max, self.commons['hostlen'])
         except (TypeError, ValueError):
-            self.commons['hostlen'] = len(self.opts.master)
+            pass
 
     def _connect_to_node(self, node):
         """Try to connect to the node, and if we can add to the client list to
@@ -1068,7 +1039,7 @@ class SoSCollector(SoSComponent):
                 client.set_node_manifest(getattr(self.collect_md.nodes,
                                                  node[0]))
             else:
-                client.close_ssh_session()
+                client.disconnect()
         except Exception:
             pass
 
@@ -1077,12 +1048,11 @@ class SoSCollector(SoSComponent):
         provided on the command line
         """
         disclaimer = ("""\
-This utility is used to collect sosreports from multiple \
-nodes simultaneously. It uses OpenSSH's ControlPersist feature \
-to connect to nodes and run commands remotely. If your system \
-installation of OpenSSH is older than 5.6, please upgrade.
+This utility is used to collect sos reports from multiple \
+nodes simultaneously. Remote connections are made and/or maintained \
+to those nodes via well-known transport protocols such as SSH.
 
-An archive of sosreport tarballs collected from the nodes will be \
+An archive of sos report tarballs collected from the nodes will be \
 generated in %s and may be provided to an appropriate support representative.
 
 The generated archive may contain data considered sensitive \
@@ -1230,10 +1200,10 @@ this utility or remote systems that it connects to.
             self.log_error("Error running sosreport: %s" % err)
 
     def close_all_connections(self):
-        """Close all ssh sessions for nodes"""
+        """Close all sessions for nodes"""
         for client in self.client_list:
-            self.log_debug('Closing SSH connection to %s' % client.address)
-            client.close_ssh_session()
+            self.log_debug('Closing connection to %s' % client.address)
+            client.disconnect()
 
     def create_cluster_archive(self):
         """Calls for creation of tar archive then cleans up the temporary
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
index 2b5d7018..64ac2a44 100644
--- a/sos/collector/clusters/__init__.py
+++ b/sos/collector/clusters/__init__.py
@@ -188,8 +188,8 @@ class Cluster():
         :rtype: ``dict``
         """
         res = self.master.run_command(cmd, get_pty=True, need_root=need_root)
-        if res['stdout']:
-            res['stdout'] = res['stdout'].replace('Password:', '')
+        if res['output']:
+            res['output'] = res['output'].replace('Password:', '')
         return res
 
     def setup(self):
diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py
index 488fbd16..8f083ac6 100644
--- a/sos/collector/clusters/jbon.py
+++ b/sos/collector/clusters/jbon.py
@@ -28,3 +28,5 @@ class jbon(Cluster):
         # This should never be called, but as insurance explicitly never
         # allow this to be enabled via the determine_cluster() path
         return False
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
index cdbf8861..99f788dc 100644
--- a/sos/collector/clusters/kubernetes.py
+++ b/sos/collector/clusters/kubernetes.py
@@ -34,7 +34,7 @@ class kubernetes(Cluster):
         if res['status'] == 0:
             nodes = []
             roles = [x for x in self.get_option('role').split(',') if x]
-            for nodeln in res['stdout'].splitlines()[1:]:
+            for nodeln in res['output'].splitlines()[1:]:
                 node = nodeln.split()
                 if not roles:
                     nodes.append(node[0])
@@ -44,3 +44,5 @@ class kubernetes(Cluster):
             return nodes
         else:
             raise Exception('Node enumeration did not return usable output')
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index 5479417d..ad97587f 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -93,7 +93,7 @@ class ocp(Cluster):
         res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
         if res['status'] == 0:
             roles = [r for r in self.get_option('role').split(':')]
-            self.node_dict = self._build_dict(res['stdout'].splitlines())
+            self.node_dict = self._build_dict(res['output'].splitlines())
             for node in self.node_dict:
                 if roles:
                     for role in roles:
@@ -103,7 +103,7 @@ class ocp(Cluster):
                     nodes.append(node)
         else:
             msg = "'oc' command failed"
-            if 'Missing or incomplete' in res['stdout']:
+            if 'Missing or incomplete' in res['output']:
                 msg = ("'oc' failed due to missing kubeconfig on master node."
                        " Specify one via '-c ocp.kubeconfig=<path>'")
             raise Exception(msg)
@@ -168,3 +168,5 @@ class ocp(Cluster):
     def set_node_options(self, node):
         # don't attempt OC API collections on non-primary nodes
         node.plugin_options.append('openshift.no-oc=on')
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py
index 079a122e..bd2d0c74 100644
--- a/sos/collector/clusters/ovirt.py
+++ b/sos/collector/clusters/ovirt.py
@@ -98,7 +98,7 @@ class ovirt(Cluster):
             return []
         res = self._run_db_query(self.dbquery)
         if res['status'] == 0:
-            nodes = res['stdout'].splitlines()[2:-1]
+            nodes = res['output'].splitlines()[2:-1]
             return [n.split('(')[0].strip() for n in nodes]
         else:
             raise Exception('database query failed, return code: %s'
@@ -114,7 +114,7 @@ class ovirt(Cluster):
         engconf = '/etc/ovirt-engine/engine.conf.d/10-setup-database.conf'
         res = self.exec_primary_cmd('cat %s' % engconf, need_root=True)
         if res['status'] == 0:
-            config = res['stdout'].splitlines()
+            config = res['output'].splitlines()
             for line in config:
                 try:
                     k = str(line.split('=')[0])
@@ -141,7 +141,7 @@ class ovirt(Cluster):
                '--batch -o postgresql {}'
                ).format(self.conf['ENGINE_DB_PASSWORD'], sos_opt)
         db_sos = self.exec_primary_cmd(cmd, need_root=True)
-        for line in db_sos['stdout'].splitlines():
+        for line in db_sos['output'].splitlines():
             if fnmatch.fnmatch(line, '*sosreport-*tar*'):
                 _pg_dump = line.strip()
                 self.master.manifest.add_field('postgresql_dump',
@@ -180,5 +180,7 @@ class rhhi_virt(rhv):
         ret = self._run_db_query('SELECT count(server_id) FROM gluster_server')
         if ret['status'] == 0:
             # if there are any entries in this table, RHHI-V is in use
-            return ret['stdout'].splitlines()[2].strip() != '0'
+            return ret['output'].splitlines()[2].strip() != '0'
         return False
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
index 034f3f3e..55024314 100644
--- a/sos/collector/clusters/pacemaker.py
+++ b/sos/collector/clusters/pacemaker.py
@@ -27,7 +27,7 @@ class pacemaker(Cluster):
             self.log_error('Cluster status could not be determined. Is the '
                            'cluster running on this node?')
             return []
-        if 'node names do not match' in self.res['stdout']:
+        if 'node names do not match' in self.res['output']:
             self.log_warn('Warning: node name mismatch reported. Attempts to '
                           'connect to some nodes may fail.\n')
         return self.parse_pcs_output()
@@ -41,17 +41,19 @@ class pacemaker(Cluster):
         return nodes
 
     def get_online_nodes(self):
-        for line in self.res['stdout'].splitlines():
+        for line in self.res['output'].splitlines():
             if line.startswith('Online:'):
                 nodes = line.split('[')[1].split(']')[0]
                 return [n for n in nodes.split(' ') if n]
 
     def get_offline_nodes(self):
         offline = []
-        for line in self.res['stdout'].splitlines():
+        for line in self.res['output'].splitlines():
             if line.startswith('Node') and line.endswith('(offline)'):
                 offline.append(line.split()[1].replace(':', ''))
             if line.startswith('OFFLINE:'):
                 nodes = line.split('[')[1].split(']')[0]
                 offline.extend([n for n in nodes.split(' ') if n])
         return offline
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py
index e123c8a3..7c21e553 100644
--- a/sos/collector/clusters/satellite.py
+++ b/sos/collector/clusters/satellite.py
@@ -28,7 +28,7 @@ class satellite(Cluster):
         res = self.exec_primary_cmd(cmd, need_root=True)
         if res['status'] == 0:
             nodes = [
-                n.strip() for n in res['stdout'].splitlines()
+                n.strip() for n in res['output'].splitlines()
                 if 'could not change directory' not in n
             ]
             return nodes
@@ -38,3 +38,5 @@ class satellite(Cluster):
         if node.address == self.master.address:
             return 'satellite'
         return 'capsule'
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 4b1ee109..f79bd5ff 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -12,22 +12,16 @@ import fnmatch
 import inspect
 import logging
 import os
-import pexpect
 import re
-import shutil
 
 from distutils.version import LooseVersion
 from pipes import quote
 from sos.policies import load
 from sos.policies.init_systems import InitSystem
-from sos.collector.exceptions import (InvalidPasswordException,
-                                      TimeoutPasswordAuthException,
-                                      PasswordRequestException,
-                                      AuthPermissionDeniedException,
+from sos.collector.transports.control_persist import SSHControlPersist
+from sos.collector.transports.local import LocalTransport
+from sos.collector.exceptions import (CommandTimeoutException,
                                       ConnectionException,
-                                      CommandTimeoutException,
-                                      ConnectionTimeoutException,
-                                      ControlSocketMissingException,
                                       UnsupportedHostException)
 
 
@@ -61,34 +61,25 @@ class SosNode():
             'sos_cmd': commons['sos_cmd']
         }
         self.sos_bin = 'sosreport'
-        filt = ['localhost', '127.0.0.1']
         self.soslog = logging.getLogger('sos')
         self.ui_log = logging.getLogger('sos_ui')
-        self.control_path = ("%s/.sos-collector-%s"
-                             % (self.tmpdir, self.address))
-        self.ssh_cmd = self._create_ssh_command()
-        if self.address not in filt:
-            try:
-                self.connected = self._create_ssh_session()
-            except Exception as err:
-                self.log_error('Unable to open SSH session: %s' % err)
-                raise
-        else:
-            self.connected = True
-            self.local = True
-            self.need_sudo = os.getuid() != 0
+        self._transport = self._load_remote_transport(commons)
+        try:
+            self._transport.connect(self._password)
+        except Exception as err:
+            self.log_error('Unable to open remote session: %s' % err)
+            raise
         # load the host policy now, even if we don't want to load further
         # host information. This is necessary if we're running locally on the
         # cluster master but do not want a local report as we still need to do
         # package checks in that instance
         self.host = self.determine_host_policy()
-        self.get_hostname()
+        self.hostname = self._transport.hostname
         if self.local and self.opts.no_local:
             load_facts = False
         if self.connected and load_facts:
             if not self.host:
-                self.connected = False
-                self.close_ssh_session()
+                self._transport.disconnect()
                 return None
             if self.local:
                 if self.check_in_container():
@@ -103,11 +88,26 @@ class SosNode():
                 self.create_sos_container()
             self._load_sos_info()
 
-    def _create_ssh_command(self):
-        """Build the complete ssh command for this node"""
-        cmd = "ssh -oControlPath=%s " % self.control_path
-        cmd += "%s@%s " % (self.opts.ssh_user, self.address)
-        return cmd
+    @property
+    def connected(self):
+        if self._transport:
+            return self._transport.connected
+        # if no transport, we're running locally
+        return True
+
+    def disconnect(self):
+        """Wrapper to close the remote session via our transport agent
+        """
+        self._transport.disconnect()
+
+    def _load_remote_transport(self, commons):
+        """Determine the type of remote transport to load for this node, then
+        return an instantiated instance of that transport
+        """
+        if self.address in ['localhost', '127.0.0.1']:
+            self.local = True
+            return LocalTransport(self.address, commons)
+        return SSHControlPersist(self.address, commons)
 
     def _fmt_msg(self, msg):
         return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg)
@@ -135,6 +135,7 @@ class SosNode():
         self.manifest.add_field('policy', self.host.distro)
         self.manifest.add_field('sos_version', self.sos_info['version'])
         self.manifest.add_field('final_sos_command', '')
+        self.manifest.add_field('transport', self._transport.name)
 
     def check_in_container(self):
         """
@@ -160,13 +161,13 @@ class SosNode():
             res = self.run_command(cmd, need_root=True)
             if res['status'] in [0, 125]:
                 if res['status'] == 125:
-                    if 'unable to retrieve auth token' in res['stdout']:
+                    if 'unable to retrieve auth token' in res['output']:
                         self.log_error(
                             "Could not pull image. Provide either a username "
                             "and password or authfile"
                         )
                         raise Exception
-                    elif 'unknown: Not found' in res['stdout']:
+                    elif 'unknown: Not found' in res['output']:
                         self.log_error('Specified image not found on registry')
                         raise Exception
                     # 'name exists' with code 125 means the container was
@@ -181,11 +182,11 @@ class SosNode():
                     return True
                 else:
                     self.log_error("Could not start container after create: %s"
-                                   % ret['stdout'])
+                                   % ret['output'])
                     raise Exception
             else:
                 self.log_error("Could not create container on host: %s"
-                               % res['stdout'])
+                               % res['output'])
                 raise Exception
 
     def get_container_auth(self):
@@ -204,18 +205,11 @@ class SosNode():
 
     def file_exists(self, fname, need_root=False):
         """Checks for the presence of fname on the remote node"""
-        if not self.local:
-            try:
-                res = self.run_command("stat %s" % fname, need_root=need_root)
-                return res['status'] == 0
-            except Exception:
-                return False
-        else:
-            try:
-                os.stat(fname)
-                return True
-            except Exception:
-                return False
+        try:
+            res = self.run_command("stat %s" % fname, need_root=need_root)
+            return res['status'] == 0
+        except Exception:
+            return False
 
     @property
     def _hostname(self):
@@ -223,18 +217,6 @@ class SosNode():
             return self.hostname
         return self.address
 
-    @property
-    def control_socket_exists(self):
-        """Check if the SSH control socket exists
-
-        The control socket is automatically removed by the SSH daemon in the
-        event that the last connection to the node was greater than the timeout
-        set by the ControlPersist option. This can happen for us if we are
-        collecting from a large number of nodes, and the timeout expires before
-        we start collection.
-        """
-        return os.path.exists(self.control_path)
-
     def _sanitize_log_msg(self, msg):
         """Attempts to obfuscate sensitive information in log messages such as
         passwords"""
@@ -264,12 +246,6 @@ class SosNode():
         msg = '[%s:%s] %s' % (self._hostname, caller, msg)
         self.soslog.debug(msg)
 
-    def get_hostname(self):
-        """Get the node's hostname"""
-        sout = self.run_command('hostname')
-        self.hostname = sout['stdout'].strip()
-        self.log_info('Hostname set to %s' % self.hostname)
-
     def _format_cmd(self, cmd):
         """If we need to provide a sudo or root password to a command, then
         here we prefix the command with the correct bits
@@ -280,19 +256,6 @@ class SosNode():
             return "sudo -S %s" % cmd
         return cmd
 
-    def _fmt_output(self, output=None, rc=0):
-        """Formats the returned output from a command into a dict"""
-        if rc == 0:
-            stdout = output
-            stderr = ''
-        else:
-            stdout = ''
-            stderr = output
-        res = {'status': rc,
-               'stdout': stdout,
-               'stderr': stderr}
-        return res
-
     def _load_sos_info(self):
         """Queries the node for information about the installed version of sos
         """
@@ -306,7 +269,7 @@ class SosNode():
             pkgs = self.run_command(self.host.container_version_command,
                                     use_container=True, need_root=True)
             if pkgs['status'] == 0:
-                ver = pkgs['stdout'].strip().split('-')[1]
+                ver = pkgs['output'].strip().split('-')[1]
                 if ver:
                     self.sos_info['version'] = ver
             else:
@@ -321,18 +284,21 @@ class SosNode():
                 self.log_error('sos is not installed on this node')
             self.connected = False
             return False
-        cmd = 'sosreport -l'
+        # sos-4.0 changes the binary
+        if self.check_sos_version('4.0'):
+            self.sos_bin = 'sos report'
+        cmd = "%s -l" % self.sos_bin
         sosinfo = self.run_command(cmd, use_container=True, need_root=True)
         if sosinfo['status'] == 0:
-            self._load_sos_plugins(sosinfo['stdout'])
+            self._load_sos_plugins(sosinfo['output'])
         if self.check_sos_version('3.6'):
             self._load_sos_presets()
 
     def _load_sos_presets(self):
-        cmd = 'sosreport --list-presets'
+        cmd = '%s --list-presets' % self.sos_bin
         res = self.run_command(cmd, use_container=True, need_root=True)
         if res['status'] == 0:
-            for line in res['stdout'].splitlines():
+            for line in res['output'].splitlines():
                 if line.strip().startswith('name:'):
                     pname = line.split('name:')[1].strip()
                     self.sos_info['presets'].append(pname)
@@ -372,21 +338,7 @@ class SosNode():
         """Reads the specified file and returns the contents"""
         try:
             self.log_info("Reading file %s" % to_read)
-            if not self.local:
-                res = self.run_command("cat %s" % to_read, timeout=5)
-                if res['status'] == 0:
-                    return res['stdout']
-                else:
-                    if 'No such file' in res['stdout']:
-                        self.log_debug("File %s does not exist on node"
-                                       % to_read)
-                    else:
-                        self.log_error("Error reading %s: %s" %
-                                       (to_read, res['stdout'].split(':')[1:]))
-                    return ''
-            else:
-                with open(to_read, 'r') as rfile:
-                    return rfile.read()
+            return self._transport.read_file(to_read)
         except Exception as err:
             self.log_error("Exception while reading %s: %s" % (to_read, err))
             return ''
@@ -400,7 +352,8 @@ class SosNode():
                           % self.commons['policy'].distro)
             return self.commons['policy']
         host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(),
-                    probe_runtime=True, remote_exec=self.ssh_cmd,
+                    probe_runtime=True,
+                    remote_exec=self._transport.remote_exec,
                     remote_check=self.read_file('/etc/os-release'))
         if host:
             self.log_info("loaded policy %s for host" % host.distro)
@@ -422,7 +375,7 @@ class SosNode():
         return self.host.package_manager.pkg_by_name(pkg) is not None
 
     def run_command(self, cmd, timeout=180, get_pty=False, need_root=False,
-                    force_local=False, use_container=False, env=None):
+                    use_container=False, env=None):
         """Runs a given cmd, either via the SSH session or locally
 
         Arguments:
@@ -433,58 +386,37 @@ class SosNode():
             need_root - if a command requires root privileges, setting this to
                         True tells sos-collector to format the command with
                         sudo or su - as appropriate and to input the password
-            force_local - force a command to run locally. Mainly used for scp.
             use_container - Run this command in a container *IF* the host is
                             containerized
         """
-        if not self.control_socket_exists and not self.local:
-            self.log_debug('Control socket does not exist, attempting to '
-                           're-create')
+        if not self.connected and not self.local:
+            self.log_debug('Node is disconnected, attempting to reconnect')
             try:
-                _sock = self._create_ssh_session()
-                if not _sock:
-                    self.log_debug('Failed to re-create control socket')
-                    raise ControlSocketMissingException
+                reconnected = self._transport.reconnect(self._password)
+                if not reconnected:
+                    self.log_debug('Failed to reconnect to node')
+                    raise ConnectionException
             except Exception as err:
-                self.log_error('Cannot run command: control socket does not '
-                               'exist')
-                self.log_debug("Error while trying to create new SSH control "
-                               "socket: %s" % err)
+                self.log_debug("Error while trying to reconnect: %s" % err)
                 raise
         if use_container and self.host.containerized:
             cmd = self.host.format_container_command(cmd)
         if need_root:
-            get_pty = True
             cmd = self._format_cmd(cmd)
-        self.log_debug('Running command %s' % cmd)
+
         if 'atomic' in cmd:
             get_pty = True
-        if not self.local and not force_local:
-            cmd = "%s %s" % (self.ssh_cmd, quote(cmd))
-        else:
-            if get_pty:
-                cmd = "/bin/bash -c %s" % quote(cmd)
+
+        if get_pty:
+            cmd = "/bin/bash -c %s" % quote(cmd)
+
         if env:
             _cmd_env = self.env_vars
             env = _cmd_env.update(env)
-        res = pexpect.spawn(cmd, encoding='utf-8', env=env)
-        if need_root:
-            if self.need_sudo:
-                res.sendline(self.opts.sudo_pw)
-            if self.opts.become_root:
-                res.sendline(self.opts.root_password)
-        output = res.expect([pexpect.EOF, pexpect.TIMEOUT],
-                            timeout=timeout)
-        if output == 0:
-            out = res.before
-            res.close()
-            rc = res.exitstatus
-            return {'status': rc, 'stdout': out}
-        elif output == 1:
-            raise CommandTimeoutException(cmd)
+        return self._transport.run_command(cmd, timeout, need_root, env)
 
     def sosreport(self):
-        """Run a sosreport on the node, then collect it"""
+        """Run an sos report on the node, then collect it"""
         try:
             path = self.execute_sos_command()
             if path:
@@ -497,109 +429,6 @@ class SosNode():
             pass
         self.cleanup()
 
-    def _create_ssh_session(self):
-        """
-        Using ControlPersist, create the initial connection to the node.
-
-        This will generate an OpenSSH ControlPersist socket within the tmp
-        directory created or specified for sos-collector to use.
-
-        At most, we will wait 30 seconds for a connection. This involves a 15
-        second wait for the initial connection attempt, and a subsequent 15
-        second wait for a response when we supply a password.
-
-        Since we connect to nodes in parallel (using the --threads value), this
-        means that the time between 'Connecting to nodes...' and 'Beginning
-        collection of sosreports' that users see can be up to an amount of time
-        equal to 30*(num_nodes/threads) seconds.
-
-        Returns
-            True if session is successfully opened, else raise Exception
-        """
-        # Don't use self.ssh_cmd here as we need to add a few additional
-        # parameters to establish the initial connection
-        self.log_info('Opening SSH session to create control socket')
-        connected = False
-        ssh_key = ''
-        ssh_port = ''
-        if self.opts.ssh_port != 22:
-            ssh_port = "-p%s " % self.opts.ssh_port
-        if self.opts.ssh_key:
-            ssh_key = "-i%s" % self.opts.ssh_key
-        cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto "
-               "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s "
-               "\"echo Connected\"" % (ssh_key,
-                                       ssh_port,
-                                       self.control_path,
-                                       self.opts.ssh_user,
-                                       self.address))
-        res = pexpect.spawn(cmd, encoding='utf-8')
-
-        connect_expects = [
-            u'Connected',
-            u'password:',
-            u'.*Permission denied.*',
-            u'.* port .*: No route to host',
-            u'.*Could not resolve hostname.*',
-            pexpect.TIMEOUT
-        ]
-
-        index = res.expect(connect_expects, timeout=15)
-
-        if index == 0:
-            connected = True
-        elif index == 1:
-            if self._password:
-                pass_expects = [
-                    u'Connected',
-                    u'Permission denied, please try again.',
-                    pexpect.TIMEOUT
-                ]
-                res.sendline(self._password)
-                pass_index = res.expect(pass_expects, timeout=15)
-                if pass_index == 0:
-                    connected = True
-                elif pass_index == 1:
-                    # Note that we do not get an exitstatus here, so matching
-                    # this line means an invalid password will be reported for
-                    # both invalid passwords and invalid user names
-                    raise InvalidPasswordException
-                elif pass_index == 2:
-                    raise TimeoutPasswordAuthException
-            else:
-                raise PasswordRequestException
-        elif index == 2:
-            raise AuthPermissionDeniedException
-        elif index == 3:
-            raise ConnectionException(self.address, self.opts.ssh_port)
-        elif index == 4:
-            raise ConnectionException(self.address)
-        elif index == 5:
-            raise ConnectionTimeoutException
-        else:
-            raise Exception("Unknown error, client returned %s" % res.before)
-        if connected:
-            self.log_debug("Successfully created control socket at %s"
-                           % self.control_path)
-            return True
-        return False
-
-    def close_ssh_session(self):
-        """Remove the control socket to effectively terminate the session"""
-        if self.local:
-            return True
-        try:
-            res = self.run_command("rm -f %s" % self.control_path,
-                                   force_local=True)
-            if res['status'] == 0:
-                return True
-            self.log_error("Could not remove ControlPath %s: %s"
-                           % (self.control_path, res['stdout']))
-            return False
-        except Exception as e:
-            self.log_error('Error closing SSH session: %s' % e)
-            return False
-
     def _preset_exists(self, preset):
         """Verifies if the given preset exists on the node"""
         return preset in self.sos_info['presets']
@@ -646,8 +475,8 @@ class SosNode():
         self.cluster = cluster
 
     def update_cmd_from_cluster(self):
-        """This is used to modify the sosreport command run on the nodes.
-        By default, sosreport is run without any options, using this will
+        """This is used to modify the sos report command run on the nodes.
+        By default, sos report is run without any options, using this will
         allow the profile to specify what plugins to run or not and what
         options to use.
 
@@ -727,10 +556,6 @@ class SosNode():
             if self.opts.since:
                 sos_opts.append('--since=%s' % quote(self.opts.since))
 
-        # sos-4.0 changes the binary
-        if self.check_sos_version('4.0'):
-            self.sos_bin = 'sos report'
-
         if self.check_sos_version('4.1'):
             if self.opts.skip_commands:
                 sos_opts.append(
@@ -811,7 +636,7 @@ class SosNode():
         self.manifest.add_field('final_sos_command', self.sos_cmd)
 
     def determine_sos_label(self):
-        """Determine what, if any, label should be added to the sosreport"""
+        """Determine what, if any, label should be added to the sos report"""
         label = ''
         label += self.cluster.get_node_label(self)
 
@@ -822,7 +647,7 @@ class SosNode():
         if not label:
             return None
 
-        self.log_debug('Label for sosreport set to %s' % label)
+        self.log_debug('Label for sos report set to %s' % label)
         if self.check_sos_version('3.6'):
             lcmd = '--label'
         else:
@@ -844,20 +669,20 @@ class SosNode():
 
     def determine_sos_error(self, rc, stdout):
         if rc == -1:
-            return 'sosreport process received SIGKILL on node'
+            return 'sos report process received SIGKILL on node'
         if rc == 1:
             if 'sudo' in stdout:
                 return 'sudo attempt failed'
         if rc == 127:
-            return 'sosreport terminated unexpectedly. Check disk space'
+            return 'sos report terminated unexpectedly. Check disk space'
         if len(stdout) > 0:
             return stdout.split('\n')[0:1]
         else:
             return 'sos exited with code %s' % rc
 
     def execute_sos_command(self):
-        """Run sosreport and capture the resulting file path"""
-        self.ui_msg('Generating sosreport...')
+        """Run sos report and capture the resulting file path"""
+        self.ui_msg('Generating sos report...')
         try:
             path = False
             checksum = False
@@ -867,7 +692,7 @@ class SosNode():
                                    use_container=True,
                                    env=self.sos_env_vars)
             if res['status'] == 0:
-                for line in res['stdout'].splitlines():
+                for line in res['output'].splitlines():
                     if fnmatch.fnmatch(line, '*sosreport-*tar*'):
                         path = line.strip()
                     if line.startswith((" sha256\t", " md5\t")):
@@ -884,44 +709,31 @@ class SosNode():
                     else:
                         self.manifest.add_field('checksum_type', 'unknown')
             else:
-                err = self.determine_sos_error(res['status'], res['stdout'])
-                self.log_debug("Error running sosreport. rc = %s msg = %s"
-                               % (res['status'], res['stdout'] or
-                                  res['stderr']))
+                err = self.determine_sos_error(res['status'], res['output'])
+                self.log_debug("Error running sos report. rc = %s msg = %s"
+                               % (res['status'], res['output']))
                 raise Exception(err)
             return path
         except CommandTimeoutException:
             self.log_error('Timeout exceeded')
             raise
         except Exception as e:
-            self.log_error('Error running sosreport: %s' % e)
+            self.log_error('Error running sos report: %s' % e)
             raise
 
     def retrieve_file(self, path):
         """Copies the specified file from the host to our temp dir"""
         destdir = self.tmpdir + '/'
-        dest = destdir + path.split('/')[-1]
+        dest = os.path.join(destdir, path.split('/')[-1])
         try:
-            if not self.local:
-                if self.file_exists(path):
-                    self.log_info("Copying remote %s to local %s" %
-                                  (path, destdir))
-                    cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % (
-                        self.control_path,
-                        self.opts.ssh_user,
-                        self.address,
-                        path,
-                        destdir
-                    )
-                    res = self.run_command(cmd, force_local=True)
-                    return res['status'] == 0
-                else:
-                    self.log_debug("Attempting to copy remote file %s, but it "
-                                   "does not exist on filesystem" % path)
-                    return False
+            if self.file_exists(path):
+                self.log_info("Copying remote %s to local %s" %
+                              (path, destdir))
+                self._transport.retrieve_file(path, dest)
             else:
-                self.log_debug("Moving %s to %s" % (path, destdir))
-                shutil.copy(path, dest)
+                self.log_debug("Attempting to copy remote file %s, but it "
+                               "does not exist on filesystem" % path)
+                return False
             return True
         except Exception as err:
             self.log_debug("Failed to retrieve %s: %s" % (path, err))
@@ -933,7 +745,7 @@ class SosNode():
         """
         path = ''.join(path.split())
         try:
-            if len(path) <= 2:  # ensure we have a non '/' path
+            if len(path.split('/')) <= 2:  # ensure we have a non '/' path
                 self.log_debug("Refusing to remove path %s: appears to be "
                                "incorrect and possibly dangerous" % path)
                 return False
@@ -959,14 +771,14 @@ class SosNode():
                 except Exception:
                     self.log_error('Failed to make archive readable')
                     return False
-            self.soslog.info('Retrieving sosreport from %s' % self.address)
-            self.ui_msg('Retrieving sosreport...')
+            self.soslog.info('Retrieving sos report from %s' % self.address)
+            self.ui_msg('Retrieving sos report...')
             ret = self.retrieve_file(self.sos_path)
             if ret:
-                self.ui_msg('Successfully collected sosreport')
+                self.ui_msg('Successfully collected sos report')
                 self.file_list.append(self.sos_path.split('/')[-1])
             else:
-                self.log_error('Failed to retrieve sosreport')
+                self.log_error('Failed to retrieve sos report')
                 raise SystemExit
             return True
         else:
@@ -976,8 +788,8 @@ class SosNode():
             else:
                 e = [x.strip() for x in self.stdout.readlines() if x.strip][-1]
             self.soslog.error(
-                'Failed to run sosreport on %s: %s' % (self.address, e))
-            self.log_error('Failed to run sosreport. %s' % e)
+                'Failed to run sos report on %s: %s' % (self.address, e))
+            self.log_error('Failed to run sos report. %s' % e)
             return False
 
     def remove_sos_archive(self):
@@ -986,20 +798,20 @@ class SosNode():
         if self.sos_path is None:
             return
         if 'sosreport' not in self.sos_path:
-            self.log_debug("Node sosreport path %s looks incorrect. Not "
+            self.log_debug("Node sos report path %s looks incorrect. Not "
                            "attempting to remove path" % self.sos_path)
             return
         removed = self.remove_file(self.sos_path)
         if not removed:
-            self.log_error('Failed to remove sosreport')
+            self.log_error('Failed to remove sos report')
 
     def cleanup(self):
         """Remove the sos archive from the node once we have it locally"""
         self.remove_sos_archive()
         if self.sos_path:
             for ext in ['.sha256', '.md5']:
-                if os.path.isfile(self.sos_path + ext):
-                    self.remove_file(self.sos_path + ext)
+                if self.remove_file(self.sos_path + ext):
+                    break
         cleanup = self.host.set_cleanup_cmd()
         if cleanup:
             self.run_command(cleanup, need_root=True)
@@ -1040,3 +852,5 @@ class SosNode():
             msg = "Exception while making %s readable. Return code was %s"
             self.log_error(msg % (filepath, res['status']))
             raise Exception
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py
new file mode 100644
index 00000000..5be7dc6d
--- /dev/null
+++ b/sos/collector/transports/__init__.py
@@ -0,0 +1,317 @@
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import inspect
+import logging
+import pexpect
+import re
+
+from pipes import quote
+from sos.collector.exceptions import (ConnectionException,
+                                      CommandTimeoutException)
+
+
+class RemoteTransport():
+    """The base class used for defining supported remote transports to connect
+    to remote nodes in conjunction with `sos collect`.
+
+    This abstraction is used to manage the backend connections to nodes so that
+    SoSNode() objects can be leveraged generically to connect to nodes, inspect
+    those nodes, and run commands on them.
+    """
+
+    name = 'undefined'
+
+    def __init__(self, address, commons):
+        self.address = address
+        self.opts = commons['cmdlineopts']
+        self.tmpdir = commons['tmpdir']
+        self.need_sudo = commons['need_sudo']
+        self._hostname = None
+        self.soslog = logging.getLogger('sos')
+        self.ui_log = logging.getLogger('sos_ui')
+
+    def _sanitize_log_msg(self, msg):
+        """Attempts to obfuscate sensitive information in log messages such as
+        passwords"""
+        reg = r'(?P<var>(pass|key|secret|PASS|KEY|SECRET).*?=)(?P<value>.*?\s)'
+        return re.sub(reg, r'\g<var>****** ', msg)
+
+    def log_info(self, msg):
+        """Used to print and log info messages"""
+        caller = inspect.stack()[1][3]
+        lmsg = '[%s:%s] %s' % (self.hostname, caller, msg)
+        self.soslog.info(lmsg)
+
+    def log_error(self, msg):
+        """Used to print and log error messages"""
+        caller = inspect.stack()[1][3]
+        lmsg = '[%s:%s] %s' % (self.hostname, caller, msg)
+        self.soslog.error(lmsg)
+
+    def log_debug(self, msg):
+        """Used to print and log debug messages"""
+        msg = self._sanitize_log_msg(msg)
+        caller = inspect.stack()[1][3]
+        msg = '[%s:%s] %s' % (self.hostname, caller, msg)
+        self.soslog.debug(msg)
+
+    @property
+    def hostname(self):
+        if self._hostname and 'localhost' not in self._hostname:
+            return self._hostname
+        return self.address
+
+    @property
+    def connected(self):
+        """Is the transport __currently__ connected to the node, or otherwise
+        capable of seamlessly running a command or similar on the node?
+        """
+        return False
+
+    @property
+    def remote_exec(self):
+        """This is the command string needed to leverage the remote transport
+        when executing commands. For example, for an SSH transport this would
+        be the `ssh <options>` string prepended to any command so that the
+        command is executed by the ssh binary.
+
+        This is also referenced by the `remote_exec` parameter for policies
+        when loading a policy for a remote node
+        """
+        return None
+
+    def connect(self, password):
+        """Perform the connection steps in order to ensure that we are able to
+        connect to the node for all future operations. Note that this should
+        not provide an interactive shell at this time.
+        """
+        if self._connect(password):
+            if not self._hostname:
+                self._get_hostname()
+            return True
+        return False
+
+    def _connect(self, password):
+        """Actually perform the connection requirements. Should be overridden
+        by specific transports that subclass RemoteTransport
+        """
+        raise NotImplementedError("Transport %s does not define connect"
+                                  % self.name)
+
+    def reconnect(self, password):
+        """Attempts to reconnect to the node using the standard connect()
+        but does not do so indefinitely. This imposes a strict number of retry
+        attempts before failing out
+        """
+        attempts = 1
+        last_err = 'unknown'
+        while attempts < 5:
+            self.log_debug("Attempting reconnect (#%s) to node" % attempts)
+            try:
+                if self.connect(password):
+                    return True
+            except Exception as err:
+                self.log_debug("Attempt #%s exception: %s" % (attempts, err))
+                last_err = err
+            attempts += 1
+        self.log_error("Unable to reconnect to node after 5 attempts, "
+                       "aborting.")
+        raise ConnectionException("last exception from transport: %s"
+                                  % last_err)
+
+    def disconnect(self):
+        """Perform whatever steps are necessary, if any, to terminate any
+        connection to the node
+        """
+        try:
+            if self._disconnect():
+                self.log_debug("Successfully disconnected from node")
+            else:
+                self.log_error("Unable to successfully disconnect, see log for"
+                               " more details")
+        except Exception as err:
+            self.log_error("Failed to disconnect: %s" % err)
+
+    def _disconnect(self):
+        raise NotImplementedError("Transport %s does not define disconnect"
+                                  % self.name)
+
+    def run_command(self, cmd, timeout=180, need_root=False, env=None):
+        """Run a command on the node, returning its output and exit code.
+        This should return the exit code of the command being executed, not the
+        exit code of whatever mechanism the transport uses to execute that
+        command
+
+        :param cmd:         The command to run
+        :type cmd:          ``str``
+
+        :param timeout:     The maximum time in seconds to allow the cmd to run
+        :type timeout:      ``int``
+
+        :param get_pty:     Does ``cmd`` require a pty?
+        :type get_pty:      ``bool``
+
+        :param need_root:   Does ``cmd`` require root privileges?
+        :type neeed_root:   ``bool``
+
+        :param env:         Specify env vars to be passed to the ``cmd``
+        :type env:          ``dict``
+
+        :returns:           Output of ``cmd`` and the exit code
+        :rtype:             ``dict`` with keys ``output`` and ``status``
+        """
+        self.log_debug('Running command %s' % cmd)
+        # currently we only use/support the use of pexpect for handling the
+        # execution of these commands, as opposed to directly invoking
+        # subprocess.Popen() in conjunction with tools like sshpass.
+        # If that changes in the future, we'll add decision making logic here
+        # to route to the appropriate handler, but for now we just go straight
+        # to using pexpect
+        return self._run_command_with_pexpect(cmd, timeout, need_root, env)
+
+    def _format_cmd_for_exec(self, cmd):
+        """Format the command in the way needed for the remote transport to
+        successfully execute it as one would when manually executing it
+
+        :param cmd:     The command being executed, as formatted by SoSNode
+        :type cmd:      ``str``
+
+
+        :returns:       The command further formatted as needed by this
+                        transport
+        :rtype:         ``str``
+        """
+        cmd = "%s %s" % (self.remote_exec, quote(cmd))
+        cmd = cmd.lstrip()
+        return cmd
+
+    def _run_command_with_pexpect(self, cmd, timeout, need_root, env):
+        """Execute the command using pexpect, which allows us to more easily
+        handle prompts and timeouts compared to directly leveraging the
+        subprocess.Popen() method.
+
+        :param cmd:     The command to execute. This will be automatically
+                        formatted to use the transport.
+        :type cmd:      ``str``
+
+        :param timeout: The maximum time in seconds to run ``cmd``
+        :type timeout:  ``int``
+
+        :param need_root:   Does ``cmd`` need to run as root or with sudo?
+        :type need_root:    ``bool``
+
+        :param env:     Any env vars that ``cmd`` should be run with
+        :type env:      ``dict``
+        """
+        cmd = self._format_cmd_for_exec(cmd)
+        result = pexpect.spawn(cmd, encoding='utf-8', env=env)
+
+        _expects = [pexpect.EOF, pexpect.TIMEOUT]
+        if need_root and self.opts.ssh_user != 'root':
+            _expects.extend([
+                '\\[sudo\\] password for .*:',
+                'Password:'
+            ])
+
+        index = result.expect(_expects, timeout=timeout)
+
+        if index in [2, 3]:
+            self._send_pexpect_password(index, result)
+            index = result.expect(_expects, timeout=timeout)
+
+        if index == 0:
+            out = result.before
+            result.close()
+            return {'status': result.exitstatus, 'output': out}
+        elif index == 1:
+            raise CommandTimeoutException(cmd)
+
+    def _send_pexpect_password(self, index, result):
+        """Handle password prompts for sudo and su usage for non-root SSH users
+
+        :param index:       The index pexpect.spawn returned to match against
+                            either a sudo or su prompt
+        :type index:        ``int``
+
+        :param result:      The spawn running the command
+        :type result:       ``pexpect.spawn``
+        """
+        if index == 2:
+            if not self.opts.sudo_pw and not self.opt.nopasswd_sudo:
+                msg = ("Unable to run command: sudo password "
+                       "required but not provided")
+                self.log_error(msg)
+                raise Exception(msg)
+            result.sendline(self.opts.sudo_pw)
+        elif index == 3:
+            if not self.opts.root_password:
+                msg = ("Unable to run command as root: no root password given")
+                self.log_error(msg)
+                raise Exception(msg)
+            result.sendline(self.opts.root_password)
+
+    def _get_hostname(self):
+        """Determine the hostname of the node and set that for future reference
+        and logging
+
+        :returns:   The hostname of the system, per the `hostname` command
+        :rtype:     ``str``
+        """
+        _out = self.run_command('hostname')
+        if _out['status'] == 0:
+            self._hostname = _out['output'].strip()
+        self.log_info("Hostname set to %s" % self._hostname)
+        return self._hostname
+
+    def retrieve_file(self, fname, dest):
+        """Copy a remote file, fname, to dest on the local node
+
+        :param fname:   The name of the file to retrieve
+        :type fname:    ``str``
+
+        :param dest:    Where to save the file to locally
+        :type dest:     ``str``
+
+        :returns:   True if file was successfully copied from remote, or False
+        :rtype:     ``bool``
+        """
+        return self._retrieve_file(fname, dest)
+
+    def _retrieve_file(self, fname, dest):
+        raise NotImplementedError("Transport %s does not support file copying"
+                                  % self.name)
+
+    def read_file(self, fname):
+        """Read the given file fname and return its contents
+
+        :param fname:   The name of the file to read
+        :type fname:    ``str``
+
+        :returns:   The content of the file
+        :rtype:     ``str``
+        """
+        self.log_debug("Reading file %s" % fname)
+        return self._read_file(fname)
+
+    def _read_file(self, fname):
+        res = self.run_command("cat %s" % fname, timeout=5)
+        if res['status'] == 0:
+            return res['output']
+        else:
+            if 'No such file' in res['output']:
+                self.log_debug("File %s does not exist on node"
+                               % fname)
+            else:
+                self.log_error("Error reading %s: %s" %
+                               (fname, res['output'].split(':')[1:]))
+            return ''
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py
new file mode 100644
index 00000000..3e848b41
--- /dev/null
+++ b/sos/collector/transports/control_persist.py
@@ -0,0 +1,199 @@
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+
+import os
+import pexpect
+import subprocess
+
+from sos.collector.transports import RemoteTransport
+from sos.collector.exceptions import (InvalidPasswordException,
+                                      TimeoutPasswordAuthException,
+                                      PasswordRequestException,
+                                      AuthPermissionDeniedException,
+                                      ConnectionException,
+                                      ConnectionTimeoutException,
+                                      ControlSocketMissingException,
+                                      ControlPersistUnsupportedException)
+from sos.utilities import sos_get_command_output
+
+
+class SSHControlPersist(RemoteTransport):
+    """A transport for collect that leverages OpenSSH's Control Persist
+    functionality which uses control sockets to transparently keep a connection
+    open to the remote host without needing to rebuild the SSH connection for
+    each and every command executed on the node
+    """
+
+    name = 'control_persist'
+
+    def _check_for_control_persist(self):
+        """Checks to see if the local system supported SSH ControlPersist.
+
+        ControlPersist allows OpenSSH to keep a single open connection to a
+        remote host rather than building a new session each time. This is the
+        same feature that Ansible uses in place of paramiko, which we have a
+        need to drop in sos-collector.
+
+        This check relies on feedback from the ssh binary. The command being
+        run should always generate stderr output, but depending on what that
+        output reads we can determine if ControlPersist is supported or not.
+
+        For our purposes, a host that does not support ControlPersist is not
+        able to run sos-collector.
+
+        Returns
+            True if ControlPersist is supported, else raise Exception.
+        """
+        ssh_cmd = ['ssh', '-o', 'ControlPersist']
+        cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+        out, err = cmd.communicate()
+        err = err.decode('utf-8')
+        if 'Bad configuration option' in err or 'Usage:' in err:
+            raise ControlPersistUnsupportedException
+        return True
+
+    def _connect(self, password=''):
+        """
+        Using ControlPersist, create the initial connection to the node.
+
+        This will generate an OpenSSH ControlPersist socket within the tmp
+        directory created or specified for sos-collector to use.
+
+        At most, we will wait 30 seconds for a connection. This involves a 15
+        second wait for the initial connection attempt, and a subsequent 15
+        second wait for a response when we supply a password.
+
+        Since we connect to nodes in parallel (using the --threads value), this
+        means that the time between 'Connecting to nodes...' and 'Beginning
+        collection of sosreports' that users see can be up to an amount of time
+        equal to 30*(num_nodes/threads) seconds.
+
+        Returns
+            True if session is successfully opened, else raise Exception
+        """
+        try:
+            self._check_for_control_persist()
+        except ControlPersistUnsupportedException:
+            self.log_error("OpenSSH ControlPersist is not locally supported. "
+                           "Please update your OpenSSH installation.")
+            raise
+        self.log_info('Opening SSH session to create control socket')
+        self.control_path = ("%s/.sos-collector-%s" % (self.tmpdir,
+                                                       self.address))
+        self.ssh_cmd = ''
+        connected = False
+        ssh_key = ''
+        ssh_port = ''
+        if self.opts.ssh_port != 22:
+            ssh_port = "-p%s " % self.opts.ssh_port
+        if self.opts.ssh_key:
+            ssh_key = "-i%s" % self.opts.ssh_key
+
+        cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto "
+               "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s "
+               "\"echo Connected\"" % (ssh_key,
+                                       ssh_port,
+                                       self.control_path,
+                                       self.opts.ssh_user,
+                                       self.address))
+        res = pexpect.spawn(cmd, encoding='utf-8')
+
+        connect_expects = [
+            u'Connected',
+            u'password:',
+            u'.*Permission denied.*',
+            u'.* port .*: No route to host',
+            u'.*Could not resolve hostname.*',
+            pexpect.TIMEOUT
+        ]
+
+        index = res.expect(connect_expects, timeout=15)
+
+        if index == 0:
+            connected = True
+        elif index == 1:
+            if password:
+                pass_expects = [
+                    u'Connected',
+                    u'Permission denied, please try again.',
+                    pexpect.TIMEOUT
+                ]
+                res.sendline(password)
+                pass_index = res.expect(pass_expects, timeout=15)
+                if pass_index == 0:
+                    connected = True
+                elif pass_index == 1:
+                    # Note that we do not get an exitstatus here, so matching
+                    # this line means an invalid password will be reported for
+                    # both invalid passwords and invalid user names
+                    raise InvalidPasswordException
+                elif pass_index == 2:
+                    raise TimeoutPasswordAuthException
+            else:
+                raise PasswordRequestException
+        elif index == 2:
+            raise AuthPermissionDeniedException
+        elif index == 3:
+            raise ConnectionException(self.address, self.opts.ssh_port)
+        elif index == 4:
+            raise ConnectionException(self.address)
+        elif index == 5:
+            raise ConnectionTimeoutException
+        else:
+            raise Exception("Unknown error, client returned %s" % res.before)
+        if connected:
+            if not os.path.exists(self.control_path):
+                raise ControlSocketMissingException
+            self.log_debug("Successfully created control socket at %s"
+                           % self.control_path)
+            return True
+        return False
+
+    def _disconnect(self):
+        if os.path.exists(self.control_path):
+            try:
+                os.remove(self.control_path)
+                return True
+            except Exception as err:
+                self.log_debug("Could not disconnect properly: %s" % err)
+                return False
+        self.log_debug("Control socket not present when attempting to "
+                       "terminate session")
+
+    @property
+    def connected(self):
+        """Check if the SSH control socket exists
+
+        The control socket is automatically removed by the SSH daemon in the
+        event that the last connection to the node was greater than the timeout
+        set by the ControlPersist option. This can happen for us if we are
+        collecting from a large number of nodes, and the timeout expires before
+        we start collection.
+        """
+        return os.path.exists(self.control_path)
+
+    @property
+    def remote_exec(self):
+        if not self.ssh_cmd:
+            self.ssh_cmd = "ssh -oControlPath=%s %s@%s" % (
+                self.control_path, self.opts.ssh_user, self.address
+            )
+        return self.ssh_cmd
+
+    def _retrieve_file(self, fname, dest):
+        cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % (
+            self.control_path, self.opts.ssh_user, self.address, fname, dest
+        )
+        res = sos_get_command_output(cmd)
+        return res['status'] == 0
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py
new file mode 100644
index 00000000..a4897f19
--- /dev/null
+++ b/sos/collector/transports/local.py
@@ -0,0 +1,49 @@
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import os
+import shutil
+
+from sos.collector.transports import RemoteTransport
+
+
+class LocalTransport(RemoteTransport):
+    """A 'transport' to represent a local node. This allows us to more easily
+    extend SoSNode() without having a ton of 'if local' or similar checks in
+    more places than we actually need them
+    """
+
+    name = 'local_node'
+
+    def _connect(self, password):
+        return True
+
+    def _disconnect(self):
+        return True
+
+    @property
+    def connected(self):
+        return True
+
+    def _retrieve_file(self, fname, dest):
+        self.log_debug("Moving %s to %s" % (fname, dest))
+        shutil.copy(fname, dest)
+
+    def _format_cmd_for_exec(self, cmd):
+        return cmd
+
+    def _read_file(self, fname):
+        if os.path.exists(fname):
+            with open(fname, 'r') as rfile:
+                return rfile.read()
+        self.log_debug("No such file: %s" % fname)
+        return ''
+
+# vim: set et ts=4 sw=4 :
-- 
2.31.1

From 07d96d52ef69b9f8fe1ef32a1b88089d31c33fe8 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Mon, 27 Sep 2021 12:28:27 -0400
Subject: [PATCH 2/2] [plugins] Update plugins to use new os.path.join wrapper

Updates plugins to use the new `self.path_join()` wrapper for
`os.path.join()` so that these plugins now account for non-/ sysroots
for their collections.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/__init__.py            |  2 +-
 sos/report/plugins/azure.py               |  4 +--
 sos/report/plugins/collectd.py            |  2 +-
 sos/report/plugins/container_log.py       |  2 +-
 sos/report/plugins/corosync.py            |  2 +-
 sos/report/plugins/docker_distribution.py |  5 ++--
 sos/report/plugins/ds.py                  |  3 +--
 sos/report/plugins/elastic.py             |  4 ++-
 sos/report/plugins/etcd.py                |  2 +-
 sos/report/plugins/gluster.py             |  3 ++-
 sos/report/plugins/jars.py                |  2 +-
 sos/report/plugins/kdump.py               |  4 +--
 sos/report/plugins/libvirt.py             |  2 +-
 sos/report/plugins/logs.py                |  8 +++---
 sos/report/plugins/manageiq.py            | 12 ++++-----
 sos/report/plugins/numa.py                |  9 +++----
 sos/report/plugins/openstack_instack.py   |  2 +-
 sos/report/plugins/openstack_nova.py      |  2 +-
 sos/report/plugins/openvswitch.py         | 13 ++++-----
 sos/report/plugins/origin.py              | 28 +++++++++++---------
 sos/report/plugins/ovirt.py               |  2 +-
 sos/report/plugins/ovirt_engine_backup.py |  5 ++--
 sos/report/plugins/ovn_central.py         | 26 +++++++++---------
 sos/report/plugins/ovn_host.py            |  4 +--
 sos/report/plugins/pacemaker.py           |  4 +--
 sos/report/plugins/pcp.py                 | 32 +++++++++++------------
 sos/report/plugins/postfix.py             |  2 +-
 sos/report/plugins/postgresql.py          |  2 +-
 sos/report/plugins/powerpc.py             |  2 +-
 sos/report/plugins/processor.py           |  3 +--
 sos/report/plugins/python.py              |  4 +--
 sos/report/plugins/sar.py                 |  5 ++--
 sos/report/plugins/sos_extras.py          |  2 +-
 sos/report/plugins/ssh.py                 |  7 +++--
 sos/report/plugins/unpackaged.py          |  4 +--
 sos/report/plugins/watchdog.py            | 13 +++++----
 sos/report/plugins/yum.py                 |  2 +-
 37 files changed, 115 insertions(+), 115 deletions(-)

diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index 1f84bca4..ec138f83 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -2897,7 +2897,7 @@ class Plugin():
         try:
             cmd_line_paths = glob.glob(cmd_line_glob)
             for path in cmd_line_paths:
-                f = open(path, 'r')
+                f = open(self.path_join(path), 'r')
                 cmd_line = f.read().strip()
                 if process in cmd_line:
                     status = True
diff --git a/sos/report/plugins/azure.py b/sos/report/plugins/azure.py
index 45971a61..90999b3f 100644
--- a/sos/report/plugins/azure.py
+++ b/sos/report/plugins/azure.py
@@ -8,8 +8,8 @@
 #
 # See the LICENSE file in the source distribution for further information.
 
-import os
 from sos.report.plugins import Plugin, UbuntuPlugin, RedHatPlugin
+import os
 
 
 class Azure(Plugin, UbuntuPlugin):
@@ -38,7 +38,7 @@ class Azure(Plugin, UbuntuPlugin):
 
         for path, subdirs, files in os.walk("/var/log/azure"):
             for name in files:
-                self.add_copy_spec(os.path.join(path, name), sizelimit=limit)
+                self.add_copy_spec(self.path_join(path, name), sizelimit=limit)
 
         self.add_cmd_output((
             'curl -s -H Metadata:true '
diff --git a/sos/report/plugins/collectd.py b/sos/report/plugins/collectd.py
index 80d4b00a..8584adf9 100644
--- a/sos/report/plugins/collectd.py
+++ b/sos/report/plugins/collectd.py
@@ -33,7 +33,7 @@ class Collectd(Plugin, IndependentPlugin):
 
         p = re.compile('^LoadPlugin.*')
         try:
-            with open("/etc/collectd.conf") as f:
+            with open(self.path_join("/etc/collectd.conf"), 'r') as f:
                 for line in f:
                     if p.match(line):
                         self.add_alert("Active Plugin found: %s" %
diff --git a/sos/report/plugins/container_log.py b/sos/report/plugins/container_log.py
index 14e0b7d8..e8dedad2 100644
--- a/sos/report/plugins/container_log.py
+++ b/sos/report/plugins/container_log.py
@@ -29,6 +29,6 @@ class ContainerLog(Plugin, IndependentPlugin):
         """Collect *.log files from subdirs of passed root path
         """
         for dirName, _, _ in os.walk(root):
-            self.add_copy_spec(os.path.join(dirName, '*.log'))
+            self.add_copy_spec(self.path_join(dirName, '*.log'))
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/corosync.py b/sos/report/plugins/corosync.py
index d74086e3..10e096c6 100644
--- a/sos/report/plugins/corosync.py
+++ b/sos/report/plugins/corosync.py
@@ -47,7 +47,7 @@ class Corosync(Plugin):
         # (it isnt precise but sufficient)
         pattern = r'^\s*(logging.)?logfile:\s*(\S+)$'
         try:
-            with open("/etc/corosync/corosync.conf") as f:
+            with open(self.path_join("/etc/corosync/corosync.conf"), 'r') as f:
                 for line in f:
                     if re.match(pattern, line):
                         self.add_copy_spec(re.search(pattern, line).group(2))
diff --git a/sos/report/plugins/docker_distribution.py b/sos/report/plugins/docker_distribution.py
index 84222ff7..e760f252 100644
--- a/sos/report/plugins/docker_distribution.py
+++ b/sos/report/plugins/docker_distribution.py
@@ -19,8 +19,9 @@ class DockerDistribution(Plugin):
     def setup(self):
         self.add_copy_spec('/etc/docker-distribution/')
         self.add_journal('docker-distribution')
-        if self.path_exists('/etc/docker-distribution/registry/config.yml'):
-            with open('/etc/docker-distribution/registry/config.yml') as f:
+        conf = self.path_join('/etc/docker-distribution/registry/config.yml')
+        if self.path_exists(conf):
+            with open(conf) as f:
                 for line in f:
                     if 'rootdirectory' in line:
                         loc = line.split()[1]
diff --git a/sos/report/plugins/ds.py b/sos/report/plugins/ds.py
index addf49e1..43feb21e 100644
--- a/sos/report/plugins/ds.py
+++ b/sos/report/plugins/ds.py
@@ -11,7 +11,6 @@
 # See the LICENSE file in the source distribution for further information.
 
 from sos.report.plugins import Plugin, RedHatPlugin
-import os
 
 
 class DirectoryServer(Plugin, RedHatPlugin):
@@ -47,7 +46,7 @@ class DirectoryServer(Plugin, RedHatPlugin):
         try:
             for d in self.listdir("/etc/dirsrv"):
                 if d[0:5] == 'slapd':
-                    certpath = os.path.join("/etc/dirsrv", d)
+                    certpath = self.path_join("/etc/dirsrv", d)
                     self.add_cmd_output("certutil -L -d %s" % certpath)
                     self.add_cmd_output("dsctl %s healthcheck" % d)
         except OSError:
diff --git a/sos/report/plugins/elastic.py b/sos/report/plugins/elastic.py
index ad9a06ff..da2662bc 100644
--- a/sos/report/plugins/elastic.py
+++ b/sos/report/plugins/elastic.py
@@ -39,7 +39,9 @@ class Elastic(Plugin, IndependentPlugin):
         return hostname, port
 
     def setup(self):
-        els_config_file = "/etc/elasticsearch/elasticsearch.yml"
+        els_config_file = self.path_join(
+            "/etc/elasticsearch/elasticsearch.yml"
+        )
         self.add_copy_spec(els_config_file)
 
         if self.get_option("all_logs"):
diff --git a/sos/report/plugins/etcd.py b/sos/report/plugins/etcd.py
index fd4f67eb..fe017e9f 100644
--- a/sos/report/plugins/etcd.py
+++ b/sos/report/plugins/etcd.py
@@ -62,7 +62,7 @@ class etcd(Plugin, RedHatPlugin):
 
     def get_etcd_url(self):
         try:
-            with open('/etc/etcd/etcd.conf', 'r') as ef:
+            with open(self.path_join('/etc/etcd/etcd.conf'), 'r') as ef:
                 for line in ef:
                     if line.startswith('ETCD_LISTEN_CLIENT_URLS'):
                         return line.split('=')[1].replace('"', '').strip()
diff --git a/sos/report/plugins/gluster.py b/sos/report/plugins/gluster.py
index a44ffeb7..e518e3d3 100644
--- a/sos/report/plugins/gluster.py
+++ b/sos/report/plugins/gluster.py
@@ -35,9 +35,10 @@ class Gluster(Plugin, RedHatPlugin):
         ]
         for statedump_file in statedump_entries:
             statedumps_present = statedumps_present+1
+            _spath = self.path_join(name_dir, statedump_file)
             ret = -1
             while ret == -1:
-                with open(name_dir + '/' + statedump_file, 'r') as sfile:
+                with open(_spath, 'r') as sfile:
                     last_line = sfile.readlines()[-1]
                     ret = string.count(last_line, 'DUMP_END_TIME')
 
diff --git a/sos/report/plugins/jars.py b/sos/report/plugins/jars.py
index 0d3cf37e..4b98684e 100644
--- a/sos/report/plugins/jars.py
+++ b/sos/report/plugins/jars.py
@@ -63,7 +63,7 @@ class Jars(Plugin, RedHatPlugin):
         for location in locations:
             for dirpath, _, filenames in os.walk(location):
                 for filename in filenames:
-                    path = os.path.join(dirpath, filename)
+                    path = self.path_join(dirpath, filename)
                     if Jars.is_jar(path):
                         jar_paths.append(path)
 
diff --git a/sos/report/plugins/kdump.py b/sos/report/plugins/kdump.py
index 757c2736..66565664 100644
--- a/sos/report/plugins/kdump.py
+++ b/sos/report/plugins/kdump.py
@@ -40,7 +40,7 @@ class RedHatKDump(KDump, RedHatPlugin):
     packages = ('kexec-tools',)
 
     def fstab_parse_fs(self, device):
-        with open('/etc/fstab', 'r') as fp:
+        with open(self.path_join('/etc/fstab'), 'r') as fp:
             for line in fp:
                 if line.startswith((device)):
                     return line.split()[1].rstrip('/')
@@ -50,7 +50,7 @@ class RedHatKDump(KDump, RedHatPlugin):
         fs = ""
         path = "/var/crash"
 
-        with open('/etc/kdump.conf', 'r') as fp:
+        with open(self.path_join('/etc/kdump.conf'), 'r') as fp:
             for line in fp:
                 if line.startswith("path"):
                     path = line.split()[1]
diff --git a/sos/report/plugins/libvirt.py b/sos/report/plugins/libvirt.py
index be8120ff..5caa5802 100644
--- a/sos/report/plugins/libvirt.py
+++ b/sos/report/plugins/libvirt.py
@@ -55,7 +55,7 @@ class Libvirt(Plugin, IndependentPlugin):
         else:
             self.add_copy_spec("/var/log/libvirt")
 
-        if self.path_exists(self.join_sysroot(libvirt_keytab)):
+        if self.path_exists(self.path_join(libvirt_keytab)):
             self.add_cmd_output("klist -ket %s" % libvirt_keytab)
 
         self.add_cmd_output("ls -lR /var/lib/libvirt/qemu")
diff --git a/sos/report/plugins/logs.py b/sos/report/plugins/logs.py
index ee6bb98d..606e574a 100644
--- a/sos/report/plugins/logs.py
+++ b/sos/report/plugins/logs.py
@@ -24,15 +24,15 @@ class Logs(Plugin, IndependentPlugin):
         since = self.get_option("since")
 
         if self.path_exists('/etc/rsyslog.conf'):
-            with open('/etc/rsyslog.conf', 'r') as conf:
+            with open(self.path_join('/etc/rsyslog.conf'), 'r') as conf:
                 for line in conf.readlines():
                     if line.startswith('$IncludeConfig'):
                         confs += glob.glob(line.split()[1])
 
         for conf in confs:
-            if not self.path_exists(conf):
+            if not self.path_exists(self.path_join(conf)):
                 continue
-            config = self.join_sysroot(conf)
+            config = self.path_join(conf)
             logs += self.do_regex_find_all(r"^\S+\s+(-?\/.*$)\s+", config)
 
         for i in logs:
@@ -60,7 +60,7 @@ class Logs(Plugin, IndependentPlugin):
         # - there is some data present, either persistent or runtime only
         # - systemd-journald service exists
         # otherwise fallback to collecting few well known logfiles directly
-        journal = any([self.path_exists(p + "/log/journal/")
+        journal = any([self.path_exists(self.path_join(p, "log/journal/"))
                       for p in ["/var", "/run"]])
         if journal and self.is_service("systemd-journald"):
             self.add_journal(since=since, tags='journal_full', priority=100)
diff --git a/sos/report/plugins/manageiq.py b/sos/report/plugins/manageiq.py
index 27ad6ef4..e20c4a2a 100644
--- a/sos/report/plugins/manageiq.py
+++ b/sos/report/plugins/manageiq.py
@@ -58,7 +58,7 @@ class ManageIQ(Plugin, RedHatPlugin):
     # Log files to collect from miq_dir/log/
     miq_log_dir = os.path.join(miq_dir, "log")
 
-    miq_main_log_files = [
+    miq_main_logs = [
         'ansible_tower.log',
         'top_output.log',
         'evm.log',
@@ -81,16 +81,16 @@ class ManageIQ(Plugin, RedHatPlugin):
         self.add_copy_spec(list(self.files))
 
         self.add_copy_spec([
-            os.path.join(self.miq_conf_dir, x) for x in self.miq_conf_files
+            self.path_join(self.miq_conf_dir, x) for x in self.miq_conf_files
         ])
 
         # Collect main log files without size limit.
         self.add_copy_spec([
-            os.path.join(self.miq_log_dir, x) for x in self.miq_main_log_files
+            self.path_join(self.miq_log_dir, x) for x in self.miq_main_logs
         ], sizelimit=0)
 
         self.add_copy_spec([
-            os.path.join(self.miq_log_dir, x) for x in self.miq_log_files
+            self.path_join(self.miq_log_dir, x) for x in self.miq_log_files
         ])
 
         self.add_copy_spec([
@@ -101,8 +101,8 @@ class ManageIQ(Plugin, RedHatPlugin):
         if environ.get("APPLIANCE_PG_DATA"):
             pg_dir = environ.get("APPLIANCE_PG_DATA")
             self.add_copy_spec([
-                    os.path.join(pg_dir, 'pg_log'),
-                    os.path.join(pg_dir, 'postgresql.conf')
+                    self.path_join(pg_dir, 'pg_log'),
+                    self.path_join(pg_dir, 'postgresql.conf')
             ])
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/numa.py b/sos/report/plugins/numa.py
index 0faef8d2..9094baef 100644
--- a/sos/report/plugins/numa.py
+++ b/sos/report/plugins/numa.py
@@ -9,7 +9,6 @@
 # See the LICENSE file in the source distribution for further information.
 
 from sos.report.plugins import Plugin, IndependentPlugin
-import os.path
 
 
 class Numa(Plugin, IndependentPlugin):
@@ -42,10 +41,10 @@ class Numa(Plugin, IndependentPlugin):
         ])
 
         self.add_copy_spec([
-            os.path.join(numa_path, "node*/meminfo"),
-            os.path.join(numa_path, "node*/cpulist"),
-            os.path.join(numa_path, "node*/distance"),
-            os.path.join(numa_path, "node*/hugepages/hugepages-*/*")
+            self.path_join(numa_path, "node*/meminfo"),
+            self.path_join(numa_path, "node*/cpulist"),
+            self.path_join(numa_path, "node*/distance"),
+            self.path_join(numa_path, "node*/hugepages/hugepages-*/*")
         ])
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/openstack_instack.py b/sos/report/plugins/openstack_instack.py
index 7c56c162..5b4f7d41 100644
--- a/sos/report/plugins/openstack_instack.py
+++ b/sos/report/plugins/openstack_instack.py
@@ -68,7 +68,7 @@ class OpenStackInstack(Plugin):
                 p = uc_config.get(opt)
                 if p:
                     if not os.path.isabs(p):
-                        p = os.path.join('/home/stack', p)
+                        p = self.path_join('/home/stack', p)
                     self.add_copy_spec(p)
         except Exception:
             pass
diff --git a/sos/report/plugins/openstack_nova.py b/sos/report/plugins/openstack_nova.py
index 53210c48..f840081e 100644
--- a/sos/report/plugins/openstack_nova.py
+++ b/sos/report/plugins/openstack_nova.py
@@ -103,7 +103,7 @@ class OpenStackNova(Plugin):
                 "nova-scheduler.log*"
             ]
             for novalog in novalogs:
-                self.add_copy_spec(os.path.join(novadir, novalog))
+                self.add_copy_spec(self.path_join(novadir, novalog))
 
         self.add_copy_spec([
             "/etc/nova/",
diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py
index 003596c6..179d1532 100644
--- a/sos/report/plugins/openvswitch.py
+++ b/sos/report/plugins/openvswitch.py
@@ -10,7 +10,6 @@
 
 from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin
 
-from os.path import join as path_join
 from os import environ
 
 import re
@@ -65,7 +64,9 @@ class OpenVSwitch(Plugin):
             log_dirs.append(environ.get('OVS_LOGDIR'))
 
         if not all_logs:
-            self.add_copy_spec([path_join(ld, '*.log') for ld in log_dirs])
+            self.add_copy_spec([
+                self.path_join(ld, '*.log') for ld in log_dirs
+            ])
         else:
             self.add_copy_spec(log_dirs)
 
@@ -76,13 +77,13 @@ class OpenVSwitch(Plugin):
         ])
 
         self.add_copy_spec([
-            path_join('/usr/local/etc/openvswitch', 'conf.db'),
-            path_join('/etc/openvswitch', 'conf.db'),
-            path_join('/var/lib/openvswitch', 'conf.db'),
+            self.path_join('/usr/local/etc/openvswitch', 'conf.db'),
+            self.path_join('/etc/openvswitch', 'conf.db'),
+            self.path_join('/var/lib/openvswitch', 'conf.db'),
         ])
         ovs_dbdir = environ.get('OVS_DBDIR')
         if ovs_dbdir:
-            self.add_copy_spec(path_join(ovs_dbdir, 'conf.db'))
+            self.add_copy_spec(self.path_join(ovs_dbdir, 'conf.db'))
 
         self.add_cmd_output([
             # The '-t 5' adds an upper bound on how long to wait to connect
diff --git a/sos/report/plugins/origin.py b/sos/report/plugins/origin.py
index f9cc32c1..7df9c019 100644
--- a/sos/report/plugins/origin.py
+++ b/sos/report/plugins/origin.py
@@ -69,20 +69,21 @@ class OpenShiftOrigin(Plugin):
 
     def is_static_etcd(self):
         """Determine if we are on a node running etcd"""
-        return self.path_exists(os.path.join(self.static_pod_dir, "etcd.yaml"))
+        return self.path_exists(self.path_join(self.static_pod_dir,
+                                               "etcd.yaml"))
 
     def is_static_pod_compatible(self):
         """Determine if a node is running static pods"""
         return self.path_exists(self.static_pod_dir)
 
     def setup(self):
-        bstrap_node_cfg = os.path.join(self.node_base_dir,
-                                       "bootstrap-" + self.node_cfg_file)
-        bstrap_kubeconfig = os.path.join(self.node_base_dir,
-                                         "bootstrap.kubeconfig")
-        node_certs = os.path.join(self.node_base_dir, "certs", "*")
-        node_client_ca = os.path.join(self.node_base_dir, "client-ca.crt")
-        admin_cfg = os.path.join(self.master_base_dir, "admin.kubeconfig")
+        bstrap_node_cfg = self.path_join(self.node_base_dir,
+                                         "bootstrap-" + self.node_cfg_file)
+        bstrap_kubeconfig = self.path_join(self.node_base_dir,
+                                           "bootstrap.kubeconfig")
+        node_certs = self.path_join(self.node_base_dir, "certs", "*")
+        node_client_ca = self.path_join(self.node_base_dir, "client-ca.crt")
+        admin_cfg = self.path_join(self.master_base_dir, "admin.kubeconfig")
         oc_cmd_admin = "%s --config=%s" % ("oc", admin_cfg)
         static_pod_logs_cmd = "master-logs"
 
@@ -92,11 +93,12 @@ class OpenShiftOrigin(Plugin):
             self.add_copy_spec([
                 self.master_cfg,
                 self.master_env,
-                os.path.join(self.master_base_dir, "*.crt"),
+                self.path_join(self.master_base_dir, "*.crt"),
             ])
 
             if self.is_static_pod_compatible():
-                self.add_copy_spec(os.path.join(self.static_pod_dir, "*.yaml"))
+                self.add_copy_spec(self.path_join(self.static_pod_dir,
+                                                  "*.yaml"))
                 self.add_cmd_output([
                     "%s api api" % static_pod_logs_cmd,
                     "%s controllers controllers" % static_pod_logs_cmd,
@@ -177,9 +179,9 @@ class OpenShiftOrigin(Plugin):
                 node_client_ca,
                 bstrap_node_cfg,
                 bstrap_kubeconfig,
-                os.path.join(self.node_base_dir, "*.crt"),
-                os.path.join(self.node_base_dir, "resolv.conf"),
-                os.path.join(self.node_base_dir, "node-dnsmasq.conf"),
+                self.path_join(self.node_base_dir, "*.crt"),
+                self.path_join(self.node_base_dir, "resolv.conf"),
+                self.path_join(self.node_base_dir, "node-dnsmasq.conf"),
             ])
 
             self.add_journal(units="atomic-openshift-node")
diff --git a/sos/report/plugins/ovirt.py b/sos/report/plugins/ovirt.py
index 1de606be..09647bf1 100644
--- a/sos/report/plugins/ovirt.py
+++ b/sos/report/plugins/ovirt.py
@@ -216,7 +216,7 @@ class Ovirt(Plugin, RedHatPlugin):
             "isouploader.conf"
         ]
         for conf_file in passwd_files:
-            conf_path = os.path.join("/etc/ovirt-engine", conf_file)
+            conf_path = self.path_join("/etc/ovirt-engine", conf_file)
             self.do_file_sub(
                 conf_path,
                 r"passwd=(.*)",
diff --git a/sos/report/plugins/ovirt_engine_backup.py b/sos/report/plugins/ovirt_engine_backup.py
index 676e419e..7fb6a5c7 100644
--- a/sos/report/plugins/ovirt_engine_backup.py
+++ b/sos/report/plugins/ovirt_engine_backup.py
@@ -8,7 +8,6 @@
 #
 # See the LICENSE file in the source distribution for further information.
 
-import os
 from sos.report.plugins import (Plugin, RedHatPlugin)
 from datetime import datetime
 
@@ -29,11 +28,11 @@ class oVirtEngineBackup(Plugin, RedHatPlugin):
 
     def setup(self):
         now = datetime.now().strftime("%Y%m%d%H%M%S")
-        backup_filename = os.path.join(
+        backup_filename = self.path_join(
             self.get_option("backupdir"),
             "engine-db-backup-%s.tar.gz" % (now)
         )
-        log_filename = os.path.join(
+        log_filename = self.path_join(
             self.get_option("backupdir"),
             "engine-db-backup-%s.log" % (now)
         )
diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py
index d6647aad..914eda60 100644
--- a/sos/report/plugins/ovn_central.py
+++ b/sos/report/plugins/ovn_central.py
@@ -42,7 +42,7 @@ class OVNCentral(Plugin):
                 return
         else:
             try:
-                with open(filename, 'r') as f:
+                with open(self.path_join(filename), 'r') as f:
                     try:
                         db = json.load(f)
                     except Exception:
@@ -71,13 +71,13 @@ class OVNCentral(Plugin):
         ovs_rundir = os.environ.get('OVS_RUNDIR')
         for pidfile in ['ovnnb_db.pid', 'ovnsb_db.pid', 'ovn-northd.pid']:
             self.add_copy_spec([
-                os.path.join('/var/lib/openvswitch/ovn', pidfile),
-                os.path.join('/usr/local/var/run/openvswitch', pidfile),
-                os.path.join('/run/openvswitch/', pidfile),
+                self.path_join('/var/lib/openvswitch/ovn', pidfile),
+                self.path_join('/usr/local/var/run/openvswitch', pidfile),
+                self.path_join('/run/openvswitch/', pidfile),
             ])
 
             if ovs_rundir:
-                self.add_copy_spec(os.path.join(ovs_rundir, pidfile))
+                self.add_copy_spec(self.path_join(ovs_rundir, pidfile))
 
         if self.get_option("all_logs"):
             self.add_copy_spec("/var/log/ovn/")
@@ -104,7 +104,7 @@ class OVNCentral(Plugin):
 
         schema_dir = '/usr/share/openvswitch'
 
-        nb_tables = self.get_tables_from_schema(os.path.join(
+        nb_tables = self.get_tables_from_schema(self.path_join(
             schema_dir, 'ovn-nb.ovsschema'))
 
         self.add_database_output(nb_tables, nbctl_cmds, 'ovn-nbctl')
@@ -116,7 +116,7 @@ class OVNCentral(Plugin):
               format(self.ovn_sbdb_sock_path),
               "output": "Leader: self"}
         if self.test_predicate(self, pred=SoSPredicate(self, cmd_outputs=co)):
-            sb_tables = self.get_tables_from_schema(os.path.join(
+            sb_tables = self.get_tables_from_schema(self.path_join(
                 schema_dir, 'ovn-sb.ovsschema'), ['Logical_Flow'])
             self.add_database_output(sb_tables, sbctl_cmds, 'ovn-sbctl')
             cmds += sbctl_cmds
@@ -134,14 +134,14 @@ class OVNCentral(Plugin):
         ovs_dbdir = os.environ.get('OVS_DBDIR')
         for dbfile in ['ovnnb_db.db', 'ovnsb_db.db']:
             self.add_copy_spec([
-                os.path.join('/var/lib/openvswitch/ovn', dbfile),
-                os.path.join('/usr/local/etc/openvswitch', dbfile),
-                os.path.join('/etc/openvswitch', dbfile),
-                os.path.join('/var/lib/openvswitch', dbfile),
-                os.path.join('/var/lib/ovn/etc', dbfile),
+                self.path_join('/var/lib/openvswitch/ovn', dbfile),
+                self.path_join('/usr/local/etc/openvswitch', dbfile),
+                self.path_join('/etc/openvswitch', dbfile),
+                self.path_join('/var/lib/openvswitch', dbfile),
+                self.path_join('/var/lib/ovn/etc', dbfile)
             ])
             if ovs_dbdir:
-                self.add_copy_spec(os.path.join(ovs_dbdir, dbfile))
+                self.add_copy_spec(self.path_join(ovs_dbdir, dbfile))
 
         self.add_journal(units="ovn-northd")
 
diff --git a/sos/report/plugins/ovn_host.py b/sos/report/plugins/ovn_host.py
index 3742c49f..78604a15 100644
--- a/sos/report/plugins/ovn_host.py
+++ b/sos/report/plugins/ovn_host.py
@@ -35,7 +35,7 @@ class OVNHost(Plugin):
         else:
             self.add_copy_spec("/var/log/ovn/*.log")
 
-        self.add_copy_spec([os.path.join(pp, pidfile) for pp in pid_paths])
+        self.add_copy_spec([self.path_join(pp, pidfile) for pp in pid_paths])
 
         self.add_copy_spec('/etc/sysconfig/ovn-controller')
 
@@ -49,7 +49,7 @@ class OVNHost(Plugin):
 
     def check_enabled(self):
         return (any([self.path_isfile(
-            os.path.join(pp, pidfile)) for pp in pid_paths]) or
+            self.path_join(pp, pidfile)) for pp in pid_paths]) or
             super(OVNHost, self).check_enabled())
 
 
diff --git a/sos/report/plugins/pacemaker.py b/sos/report/plugins/pacemaker.py
index 497807ff..6ce80881 100644
--- a/sos/report/plugins/pacemaker.py
+++ b/sos/report/plugins/pacemaker.py
@@ -129,7 +129,7 @@ class Pacemaker(Plugin):
 
 class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin):
     def setup(self):
-        self.envfile = "/etc/default/pacemaker"
+        self.envfile = self.path_join("/etc/default/pacemaker")
         self.setup_crm_shell()
         self.setup_pcs()
         super(DebianPacemaker, self).setup()
@@ -141,7 +141,7 @@ class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin):
 
 class RedHatPacemaker(Pacemaker, RedHatPlugin):
     def setup(self):
-        self.envfile = "/etc/sysconfig/pacemaker"
+        self.envfile = self.path_join("/etc/sysconfig/pacemaker")
         self.setup_pcs()
         self.add_copy_spec("/etc/sysconfig/sbd")
         super(RedHatPacemaker, self).setup()
diff --git a/sos/report/plugins/pcp.py b/sos/report/plugins/pcp.py
index 9707d7a9..ad902332 100644
--- a/sos/report/plugins/pcp.py
+++ b/sos/report/plugins/pcp.py
@@ -41,7 +41,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin):
         total_size = 0
         for dirpath, dirnames, filenames in os.walk(path):
             for f in filenames:
-                fp = os.path.join(dirpath, f)
+                fp = self.path_join(dirpath, f)
                 total_size += os.path.getsize(fp)
         return total_size
 
@@ -86,7 +86,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin):
         # unconditionally. Obviously if someone messes up their /etc/pcp.conf
         # in a ridiculous way (i.e. setting PCP_SYSCONF_DIR to '/') this will
         # break badly.
-        var_conf_dir = os.path.join(self.pcp_var_dir, 'config')
+        var_conf_dir = self.path_join(self.pcp_var_dir, 'config')
         self.add_copy_spec([
             self.pcp_sysconf_dir,
             self.pcp_conffile,
@@ -98,10 +98,10 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin):
         # rpms. It does not make up for a lot of size but it contains many
         # files
         self.add_forbidden_path([
-            os.path.join(var_conf_dir, 'pmchart'),
-            os.path.join(var_conf_dir, 'pmlogconf'),
-            os.path.join(var_conf_dir, 'pmieconf'),
-            os.path.join(var_conf_dir, 'pmlogrewrite')
+            self.path_join(var_conf_dir, 'pmchart'),
+            self.path_join(var_conf_dir, 'pmlogconf'),
+            self.path_join(var_conf_dir, 'pmieconf'),
+            self.path_join(var_conf_dir, 'pmlogrewrite')
         ])
 
         # Take PCP_LOG_DIR/pmlogger/`hostname` + PCP_LOG_DIR/pmmgr/`hostname`
@@ -121,13 +121,13 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin):
         # we would collect everything
         if self.pcp_hostname != '':
             # collect pmmgr logs up to 'pmmgrlogs' size limit
-            path = os.path.join(self.pcp_log_dir, 'pmmgr',
-                                self.pcp_hostname, '*')
+            path = self.path_join(self.pcp_log_dir, 'pmmgr',
+                                  self.pcp_hostname, '*')
             self.add_copy_spec(path, sizelimit=self.sizelimit, tailit=False)
             # collect newest pmlogger logs up to 'pmloggerfiles' count
             files_collected = 0
-            path = os.path.join(self.pcp_log_dir, 'pmlogger',
-                                self.pcp_hostname, '*')
+            path = self.path_join(self.pcp_log_dir, 'pmlogger',
+                                  self.pcp_hostname, '*')
             pmlogger_ls = self.exec_cmd("ls -t1 %s" % path)
             if pmlogger_ls['status'] == 0:
                 for line in pmlogger_ls['output'].splitlines():
@@ -138,15 +138,15 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin):
 
         self.add_copy_spec([
             # Collect PCP_LOG_DIR/pmcd and PCP_LOG_DIR/NOTICES
-            os.path.join(self.pcp_log_dir, 'pmcd'),
-            os.path.join(self.pcp_log_dir, 'NOTICES*'),
+            self.path_join(self.pcp_log_dir, 'pmcd'),
+            self.path_join(self.pcp_log_dir, 'NOTICES*'),
             # Collect PCP_VAR_DIR/pmns
-            os.path.join(self.pcp_var_dir, 'pmns'),
+            self.path_join(self.pcp_var_dir, 'pmns'),
             # Also collect any other log and config files
             # (as suggested by fche)
-            os.path.join(self.pcp_log_dir, '*/*.log*'),
-            os.path.join(self.pcp_log_dir, '*/*/*.log*'),
-            os.path.join(self.pcp_log_dir, '*/*/config*')
+            self.path_join(self.pcp_log_dir, '*/*.log*'),
+            self.path_join(self.pcp_log_dir, '*/*/*.log*'),
+            self.path_join(self.pcp_log_dir, '*/*/config*')
         ])
 
         # Collect a summary for the current day
diff --git a/sos/report/plugins/postfix.py b/sos/report/plugins/postfix.py
index 8f584430..3ca0c4ad 100644
--- a/sos/report/plugins/postfix.py
+++ b/sos/report/plugins/postfix.py
@@ -41,7 +41,7 @@ class Postfix(Plugin):
         ]
         fp = []
         try:
-            with open('/etc/postfix/main.cf', 'r') as cffile:
+            with open(self.path_join('/etc/postfix/main.cf'), 'r') as cffile:
                 for line in cffile.readlines():
                     # ignore comments and take the first word after '='
                     if line.startswith('#'):
diff --git a/sos/report/plugins/postgresql.py b/sos/report/plugins/postgresql.py
index bec0b019..00824db7 100644
--- a/sos/report/plugins/postgresql.py
+++ b/sos/report/plugins/postgresql.py
@@ -124,7 +124,7 @@ class RedHatPostgreSQL(PostgreSQL, SCLPlugin):
 
             # copy PG_VERSION and postmaster.opts
             for f in ["PG_VERSION", "postmaster.opts"]:
-                self.add_copy_spec(os.path.join(_dir, "data", f))
+                self.add_copy_spec(self.path_join(_dir, "data", f))
 
 
 class DebianPostgreSQL(PostgreSQL, DebianPlugin, UbuntuPlugin):
diff --git a/sos/report/plugins/powerpc.py b/sos/report/plugins/powerpc.py
index 4fb4f87c..50f88650 100644
--- a/sos/report/plugins/powerpc.py
+++ b/sos/report/plugins/powerpc.py
@@ -22,7 +22,7 @@ class PowerPC(Plugin, IndependentPlugin):
 
     def setup(self):
         try:
-            with open('/proc/cpuinfo', 'r') as fp:
+            with open(self.path_join('/proc/cpuinfo'), 'r') as fp:
                 contents = fp.read()
                 ispSeries = "pSeries" in contents
                 isPowerNV = "PowerNV" in contents
diff --git a/sos/report/plugins/processor.py b/sos/report/plugins/processor.py
index 2df2dc9a..c3d8930c 100644
--- a/sos/report/plugins/processor.py
+++ b/sos/report/plugins/processor.py
@@ -7,7 +7,6 @@
 # See the LICENSE file in the source distribution for further information.
 
 from sos.report.plugins import Plugin, IndependentPlugin
-import os
 
 
 class Processor(Plugin, IndependentPlugin):
@@ -41,7 +40,7 @@ class Processor(Plugin, IndependentPlugin):
         # cumulative directory size exceeds 25MB or even 100MB.
         cdirs = self.listdir('/sys/devices/system/cpu')
         self.add_copy_spec([
-            os.path.join('/sys/devices/system/cpu', cdir) for cdir in cdirs
+            self.path_join('/sys/devices/system/cpu', cdir) for cdir in cdirs
         ])
 
         self.add_cmd_output([
diff --git a/sos/report/plugins/python.py b/sos/report/plugins/python.py
index e2ab39ab..a8ec0cd8 100644
--- a/sos/report/plugins/python.py
+++ b/sos/report/plugins/python.py
@@ -68,9 +68,9 @@ class RedHatPython(Python, RedHatPlugin):
             ]
 
             for py_path in py_paths:
-                for root, _, files in os.walk(py_path):
+                for root, _, files in os.walk(self.path_join(py_path)):
                     for file_ in files:
-                        filepath = os.path.join(root, file_)
+                        filepath = self.path_join(root, file_)
                         if filepath.endswith('.py'):
                             try:
                                 with open(filepath, 'rb') as f:
diff --git a/sos/report/plugins/sar.py b/sos/report/plugins/sar.py
index 669f5d7b..b60005b1 100644
--- a/sos/report/plugins/sar.py
+++ b/sos/report/plugins/sar.py
@@ -7,7 +7,6 @@
 # See the LICENSE file in the source distribution for further information.
 
 from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin
-import os
 import re
 
 
@@ -24,7 +23,7 @@ class Sar(Plugin,):
                     "", False)]
 
     def setup(self):
-        self.add_copy_spec(os.path.join(self.sa_path, '*'),
+        self.add_copy_spec(self.path_join(self.sa_path, '*'),
                            sizelimit=0 if self.get_option("all_sar") else None,
                            tailit=False)
 
@@ -44,7 +43,7 @@ class Sar(Plugin,):
         # as option for sadc
         for fname in dir_list:
             if sa_regex.match(fname):
-                sa_data_path = os.path.join(self.sa_path, fname)
+                sa_data_path = self.path_join(self.sa_path, fname)
                 sar_filename = 'sar' + fname[2:]
                 if sar_filename not in dir_list:
                     sar_cmd = 'sh -c "sar -A -f %s"' % sa_data_path
diff --git a/sos/report/plugins/sos_extras.py b/sos/report/plugins/sos_extras.py
index ffde4138..55bc4dc0 100644
--- a/sos/report/plugins/sos_extras.py
+++ b/sos/report/plugins/sos_extras.py
@@ -58,7 +58,7 @@ class SosExtras(Plugin, IndependentPlugin):
 
         for path, dirlist, filelist in os.walk(self.extras_dir):
             for f in filelist:
-                _file = os.path.join(path, f)
+                _file = self.path_join(path, f)
                 self._log_warn("Collecting data from extras file %s" % _file)
                 try:
                     for line in open(_file).read().splitlines():
diff --git a/sos/report/plugins/ssh.py b/sos/report/plugins/ssh.py
index 971cda8b..9ac9dec0 100644
--- a/sos/report/plugins/ssh.py
+++ b/sos/report/plugins/ssh.py
@@ -42,7 +41,7 @@ class Ssh(Plugin, IndependentPlugin):
         try:
             for sshcfg in sshcfgs:
                 tag = sshcfg.split('/')[-1]
-                with open(sshcfg, 'r') as cfgfile:
+                with open(self.path_join(sshcfg), 'r') as cfgfile:
                     for line in cfgfile:
                         # skip empty lines and comments
                         if len(line.split()) == 0 or line.startswith('#'):
diff --git a/sos/report/plugins/unpackaged.py b/sos/report/plugins/unpackaged.py
index 9205e53f..772b1d1f 100644
--- a/sos/report/plugins/unpackaged.py
+++ b/sos/report/plugins/unpackaged.py
@@ -40,7 +40,7 @@ class Unpackaged(Plugin, RedHatPlugin):
                     for e in exclude:
                         dirs[:] = [d for d in dirs if d not in e]
                 for name in files:
-                    path = os.path.join(root, name)
+                    path = self.path_join(root, name)
                     try:
                         if stat.S_ISLNK(os.lstat(path).st_mode):
                             path = Path(path).resolve()
@@ -49,7 +49,7 @@ class Unpackaged(Plugin, RedHatPlugin):
                     file_list.append(os.path.realpath(path))
                 for name in dirs:
                     file_list.append(os.path.realpath(
-                                     os.path.join(root, name)))
+                                     self.path_join(root, name)))
 
             return file_list
 
diff --git a/sos/report/plugins/watchdog.py b/sos/report/plugins/watchdog.py
index 1bf3f4cb..bf2dc9cb 100644
--- a/sos/report/plugins/watchdog.py
+++ b/sos/report/plugins/watchdog.py
@@ -11,7 +11,6 @@
 from sos.report.plugins import Plugin, RedHatPlugin
 
 from glob import glob
-import os
 
 
 class Watchdog(Plugin, RedHatPlugin):
@@ -56,8 +55,8 @@ class Watchdog(Plugin, RedHatPlugin):
             Collect configuration files, custom executables for test-binary
             and repair-binary, and stdout/stderr logs.
         """
-        conf_file = self.get_option('conf_file')
-        log_dir = '/var/log/watchdog'
+        conf_file = self.path_join(self.get_option('conf_file'))
+        log_dir = self.path_join('/var/log/watchdog')
 
         # Get service configuration and sysconfig files
         self.add_copy_spec([
@@ -80,15 +79,15 @@ class Watchdog(Plugin, RedHatPlugin):
             self._log_warn("Could not read %s: %s" % (conf_file, ex))
 
         if self.get_option('all_logs'):
-            log_files = glob(os.path.join(log_dir, '*'))
+            log_files = glob(self.path_join(log_dir, '*'))
         else:
-            log_files = (glob(os.path.join(log_dir, '*.stdout')) +
-                         glob(os.path.join(log_dir, '*.stderr')))
+            log_files = (glob(self.path_join(log_dir, '*.stdout')) +
+                         glob(self.path_join(log_dir, '*.stderr')))
 
         self.add_copy_spec(log_files)
 
         # Get output of "wdctl <device>" for each /dev/watchdog*
-        for dev in glob('/dev/watchdog*'):
+        for dev in glob(self.path_join('/dev/watchdog*')):
             self.add_cmd_output("wdctl %s" % dev)
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/yum.py b/sos/report/plugins/yum.py
index 148464cb..e5256642 100644
--- a/sos/report/plugins/yum.py
+++ b/sos/report/plugins/yum.py
@@ -61,7 +61,7 @@ class Yum(Plugin, RedHatPlugin):
                 if not p.endswith(".py"):
                     continue
                 plugins = plugins + " " if len(plugins) else ""
-                plugins = plugins + os.path.join(YUM_PLUGIN_PATH, p)
+                plugins = plugins + self.path_join(YUM_PLUGIN_PATH, p)
             if len(plugins):
                 self.add_cmd_output("rpm -qf %s" % plugins,
                                     suggest_filename="plugin-packages")
-- 
2.31.1

From f4af5efdc79aefe1aa685c36d095925bae14dc4a Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Tue, 28 Sep 2021 13:00:17 -0400
Subject: [PATCH 1/4] [collect] Add --transport option and allow clusters to
 set transport type

Adds a new `--transport` option for users to be able to specify the type
of transport to use when connecting to nodes. The default value of
`auto` will defer to the cluster profile to set the transport type,
which will continue to default to use OpenSSH's ControlPersist feature.

Clusters may override the new `set_transport_type()` method to change
the default transport used.

If `--transport` is anything besides `auto`, then the cluster profile
will not be deferred to when choosing a transport for each remote node.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 man/en/sos-collect.1               | 15 +++++++++++++++
 sos/collector/__init__.py          |  6 ++++++
 sos/collector/clusters/__init__.py | 10 ++++++++++
 sos/collector/exceptions.py        | 13 ++++++++++++-
 sos/collector/sosnode.py           | 16 +++++++++++++++-
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
index e930023e..8ad4fe5e 100644
--- a/man/en/sos-collect.1
+++ b/man/en/sos-collect.1
@@ -43,6 +43,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
     [\-\-sos-cmd SOS_CMD]
     [\-t|\-\-threads THREADS]
     [\-\-timeout TIMEOUT]
+    [\-\-transport TRANSPORT]
     [\-\-tmp\-dir TMP_DIR]
     [\-v|\-\-verbose]
     [\-\-verify]
@@ -350,6 +351,20 @@ Note that sosreports are collected in parallel, so you can approximate the total
 runtime of sos collect via timeout*(number of nodes/jobs).
 
 Default is 180 seconds.
+.TP
+\fB\-\-transport\fR TRANSPORT
+Specify the type of remote transport to use to manage connections to remote nodes.
+
+\fBsos collect\fR uses locally installed binaries to connect to and interact with remote
+nodes, instead of directly establishing those connections. By default, OpenSSH's ControlPersist
+feature is preferred, however certain cluster types may have preferences of their own for how
+remote sessions should be established.
+
+The types of transports supported are currently as follows:
+
+    \fBauto\fR                  Allow the cluster type to determine the transport used
+    \fBcontrol_persist\fR       Use OpenSSH's ControlPersist feature. This is the default behavior
+
 .TP
 \fB\-\-tmp\-dir\fR TMP_DIR
 Specify a temporary directory to save sos archives to. By default one will be created in
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index da912655..fecfe6aa 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -98,6 +98,7 @@ class SoSCollector(SoSComponent):
         'ssh_port': 22,
         'ssh_user': 'root',
         'timeout': 600,
+        'transport': 'auto',
         'verify': False,
         'usernames': [],
         'upload': False,
@@ -378,6 +379,8 @@ class SoSCollector(SoSComponent):
                                  help='Specify an SSH user. Default root')
         collect_grp.add_argument('--timeout', type=int, required=False,
                                  help='Timeout for sosreport on each node.')
+        collect_grp.add_argument('--transport', default='auto', type=str,
+                                 help='Remote connection transport to use')
         collect_grp.add_argument("--upload", action="store_true",
                                  default=False,
                                  help="Upload archive to a policy-default "
@@ -813,6 +813,8 @@ class SoSCollector(SoSComponent):
         self.collect_md.add_field('cluster_type', self.cluster_type)
         if self.cluster:
             self.master.cluster = self.cluster
+            if self.opts.transport == 'auto':
+                self.opts.transport = self.cluster.set_transport_type()
             self.cluster.setup()
             if self.cluster.cluster_ssh_key:
                 if not self.opts.ssh_key:
@@ -1041,6 +1046,7 @@ class SoSCollector(SoSComponent):
             else:
                 client.disconnect()
         except Exception:
+            # all exception logging is handled within SoSNode
             pass
 
     def intro(self):
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
index 64ac2a44..cf1e7a0b 100644
--- a/sos/collector/clusters/__init__.py
+++ b/sos/collector/clusters/__init__.py
@@ -149,6 +149,16 @@ class Cluster():
         """
         pass
 
+    def set_transport_type(self):
+        """The default connection type used by sos collect is to leverage the
+        local system's SSH installation using ControlPersist, however certain
+        cluster types may want to use something else.
+
+        Override this in a specific cluster profile to set the ``transport``
+        option according to what type of transport should be used.
+        """
+        return 'control_persist'
+
     def set_master_options(self, node):
         """If there is a need to set specific options in the sos command being
         run on the cluster's master nodes, override this method in the cluster
diff --git a/sos/collector/exceptions.py b/sos/collector/exceptions.py
index 1e44768b..2bb07e7b 100644
--- a/sos/collector/exceptions.py
+++ b/sos/collector/exceptions.py
@@ -94,6 +94,16 @@ class UnsupportedHostException(Exception):
         super(UnsupportedHostException, self).__init__(message)
 
 
+class InvalidTransportException(Exception):
+    """Raised when a transport is requested but it does not exist or is
+    not supported locally"""
+
+    def __init__(self, transport=None):
+        message = ("Connection failed: unknown or unsupported transport %s"
+                   % transport if transport else '')
+        super(InvalidTransportException, self).__init__(message)
+
+
 __all__ = [
     'AuthPermissionDeniedException',
     'CommandTimeoutException',
@@ -104,5 +114,6 @@ __all__ = [
     'InvalidPasswordException',
     'PasswordRequestException',
     'TimeoutPasswordAuthException',
-    'UnsupportedHostException'
+    'UnsupportedHostException',
+    'InvalidTransportException'
 ]
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index f79bd5ff..5c5c7201 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -22,7 +22,13 @@ from sos.collector.transports.control_persist import SSHControlPersist
 from sos.collector.transports.local import LocalTransport
 from sos.collector.exceptions import (CommandTimeoutException,
                                       ConnectionException,
-                                      UnsupportedHostException)
+                                      UnsupportedHostException,
+                                      InvalidTransportException)
+
+TRANSPORTS = {
+    'local': LocalTransport,
+    'control_persist': SSHControlPersist,
+}
 
 
 class SosNode():
@@ -107,6 +113,14 @@ class SosNode():
         if self.address in ['localhost', '127.0.0.1']:
             self.local = True
             return LocalTransport(self.address, commons)
+        elif self.opts.transport in TRANSPORTS.keys():
+            return TRANSPORTS[self.opts.transport](self.address, commons)
+        elif self.opts.transport != 'auto':
+            self.log_error(
+                "Connection failed: unknown or unsupported transport %s"
+                % self.opts.transport
+            )
+            raise InvalidTransportException(self.opts.transport)
         return SSHControlPersist(self.address, commons)
 
     def _fmt_msg(self, msg):
-- 
2.31.1


From dbc49345384404600f45d68b8d3c6541b2a26480 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 30 Sep 2021 10:38:18 -0400
Subject: [PATCH 2/4] [transports] Add 'oc' as a transport option for remote
 nodes

This commit adds a new transport for `sos collect` by leveraging a
locally available `oc` binary that has been properly configured for
access to an OCP cluster.

This transport will allow users to use `sos collect` to collect reports
from an OCP cluster without directly connecting to any of the nodes
involved. We do this by using the `oc` binary to first launch a pod on
target node(s) and then exec our discovery commands and eventual `sos
report` command to that pod. This in turn is dependent on a function API
for the `oc` binary to communicate with. In the event that `oc` is not
__locally__ available or is not properly configured, we will fallback to
the current default of using SSH ControlPersist to directly connect to
the nodes. Otherwise, the OCP cluster will attempt to automatically use
this new transport.
---
 man/en/sos-collect.1                 |   1 +
 sos/collector/clusters/ocp.py        |  14 ++
 sos/collector/sosnode.py             |   8 +-
 sos/collector/transports/__init__.py |  20 ++-
 sos/collector/transports/oc.py       | 220 +++++++++++++++++++++++++++
 5 files changed, 257 insertions(+), 6 deletions(-)
 create mode 100644 sos/collector/transports/oc.py

diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
index 8ad4fe5e..a1f6c10e 100644
--- a/man/en/sos-collect.1
+++ b/man/en/sos-collect.1
@@ -364,6 +364,7 @@ The types of transports supported are currently as follows:
 
     \fBauto\fR                  Allow the cluster type to determine the transport used
     \fBcontrol_persist\fR       Use OpenSSH's ControlPersist feature. This is the default behavior
+    \fBoc\fR                    Use a \fBlocally\fR configured \fBoc\fR binary to deploy collection pods on OCP nodes
 
 .TP
 \fB\-\-tmp\-dir\fR TMP_DIR
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index ad97587f..a9357dbf 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -12,6 +12,7 @@ import os
 
 from pipes import quote
 from sos.collector.clusters import Cluster
+from sos.utilities import is_executable
 
 
 class ocp(Cluster):
@@ -83,6 +84,19 @@ class ocp(Cluster):
                     nodes[_node[0]][column] = _node[idx[column]]
         return nodes
 
+    def set_transport_type(self):
+        if is_executable('oc'):
+            return 'oc'
+        self.log_info("Local installation of 'oc' not found or is not "
+                      "correctly configured. Will use ControlPersist")
+        self.ui_log.warn(
+            "Preferred transport 'oc' not available, will fallback to SSH."
+        )
+        if not self.opts.batch:
+            input("Press ENTER to continue connecting with SSH, or Ctrl+C to"
+                  "abort.")
+        return 'control_persist'
+
     def get_nodes(self):
         nodes = []
         self.node_dict = {}
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 5c5c7201..8a9dbd7a 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -20,6 +20,7 @@ from sos.policies import load
 from sos.policies.init_systems import InitSystem
 from sos.collector.transports.control_persist import SSHControlPersist
 from sos.collector.transports.local import LocalTransport
+from sos.collector.transports.oc import OCTransport
 from sos.collector.exceptions import (CommandTimeoutException,
                                       ConnectionException,
                                       UnsupportedHostException,
@@ -28,6 +29,7 @@ from sos.collector.exceptions import (CommandTimeoutException,
 TRANSPORTS = {
     'local': LocalTransport,
     'control_persist': SSHControlPersist,
+    'oc': OCTransport
 }
 
 
@@ -421,13 +423,11 @@ class SosNode():
         if 'atomic' in cmd:
             get_pty = True
 
-        if get_pty:
-            cmd = "/bin/bash -c %s" % quote(cmd)
-
         if env:
             _cmd_env = self.env_vars
             env = _cmd_env.update(env)
-        return self._transport.run_command(cmd, timeout, need_root, env)
+        return self._transport.run_command(cmd, timeout, need_root, env,
+                                           get_pty)
 
     def sosreport(self):
         """Run an sos report on the node, then collect it"""
diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py
index 5be7dc6d..7bffee62 100644
--- a/sos/collector/transports/__init__.py
+++ b/sos/collector/transports/__init__.py
@@ -144,7 +144,8 @@ class RemoteTransport():
         raise NotImplementedError("Transport %s does not define disconnect"
                                   % self.name)
 
-    def run_command(self, cmd, timeout=180, need_root=False, env=None):
+    def run_command(self, cmd, timeout=180, need_root=False, env=None,
+                    get_pty=False):
         """Run a command on the node, returning its output and exit code.
         This should return the exit code of the command being executed, not the
         exit code of whatever mechanism the transport uses to execute that
@@ -165,10 +166,15 @@ class RemoteTransport():
         :param env:         Specify env vars to be passed to the ``cmd``
         :type env:          ``dict``
 
+        :param get_pty:     Does ``cmd`` require execution with a pty?
+        :type get_pty:      ``bool``
+
         :returns:           Output of ``cmd`` and the exit code
         :rtype:             ``dict`` with keys ``output`` and ``status``
         """
         self.log_debug('Running command %s' % cmd)
+        if get_pty:
+            cmd = "/bin/bash -c %s" % quote(cmd)
         # currently we only use/support the use of pexpect for handling the
         # execution of these commands, as opposed to directly invoking
         # subprocess.Popen() in conjunction with tools like sshpass.
@@ -212,6 +218,13 @@ class RemoteTransport():
         :type env:      ``dict``
         """
         cmd = self._format_cmd_for_exec(cmd)
+
+        # if for any reason env is empty, set it to None as otherwise
+        # pexpect interprets this to mean "run this command with no env vars of
+        # any kind"
+        if not env:
+            env = None
+
         result = pexpect.spawn(cmd, encoding='utf-8', env=env)
 
         _expects = [pexpect.EOF, pexpect.TIMEOUT]
@@ -268,6 +281,9 @@ class RemoteTransport():
         _out = self.run_command('hostname')
         if _out['status'] == 0:
             self._hostname = _out['output'].strip()
+
+        if not self._hostname:
+            self._hostname = self.address
         self.log_info("Hostname set to %s" % self._hostname)
         return self._hostname
 
@@ -302,7 +318,7 @@ class RemoteTransport():
         return self._read_file(fname)
 
     def _read_file(self, fname):
-        res = self.run_command("cat %s" % fname, timeout=5)
+        res = self.run_command("cat %s" % fname, timeout=10)
         if res['status'] == 0:
             return res['output']
         else:
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
new file mode 100644
index 00000000..649037b9
--- /dev/null
+++ b/sos/collector/transports/oc.py
@@ -0,0 +1,220 @@
+# Copyright Red Hat 2021, Jake Hunsaker <jhunsake@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+import json
+import tempfile
+import os
+
+from sos.collector.transports import RemoteTransport
+from sos.utilities import (is_executable, sos_get_command_output,
+                           SoSTimeoutError)
+
+
+class OCTransport(RemoteTransport):
+    """This transport leverages the execution of commands via a locally
+    available and configured ``oc`` binary for OCPv4 environments.
+
+    OCPv4 clusters generally discourage the use of SSH, so this transport may
+    be used to remove our use of SSH in favor of the environment provided
+    method of connecting to nodes and executing commands via debug pods.
+
+    Note that this approach will generate multiple debug pods over the course
+    of our execution
+    """
+
+    name = 'oc'
+    project = 'sos-collect-tmp'
+
+    def run_oc(self, cmd, **kwargs):
+        """Format and run a command with `oc` in the project defined for our
+        execution
+        """
+        return sos_get_command_output(
+            "oc -n sos-collect-tmp %s" % cmd,
+            **kwargs
+        )
+
+    @property
+    def connected(self):
+        up = self.run_oc(
+            "wait --timeout=0s --for=condition=ready pod/%s" % self.pod_name
+        )
+        return up['status'] == 0
+
+    def get_node_pod_config(self):
+        """Based on our template for the debug container, add the node-specific
+        items so that we can deploy one of these on each node we're collecting
+        from
+        """
+        return {
+            "kind": "Pod",
+            "apiVersion": "v1",
+            "metadata": {
+                "name": "%s-sos-collector" % self.address.split('.')[0],
+                "namespace": "sos-collect-tmp"
+            },
+            "priorityClassName": "system-cluster-critical",
+            "spec": {
+                "volumes": [
+                    {
+                        "name": "host",
+                        "hostPath": {
+                            "path": "/",
+                            "type": "Directory"
+                        }
+                    },
+                    {
+                        "name": "run",
+                        "hostPath": {
+                            "path": "/run",
+                            "type": "Directory"
+                        }
+                    },
+                    {
+                        "name": "varlog",
+                        "hostPath": {
+                            "path": "/var/log",
+                            "type": "Directory"
+                        }
+                    },
+                    {
+                        "name": "machine-id",
+                        "hostPath": {
+                            "path": "/etc/machine-id",
+                            "type": "File"
+                        }
+                    }
+                ],
+                "containers": [
+                    {
+                        "name": "sos-collector-tmp",
+                        "image": "registry.redhat.io/rhel8/support-tools",
+                        "command": [
+                            "/bin/bash"
+                        ],
+                        "env": [
+                            {
+                                "name": "HOST",
+                                "value": "/host"
+                            }
+                        ],
+                        "resources": {},
+                        "volumeMounts": [
+                            {
+                                "name": "host",
+                                "mountPath": "/host"
+                            },
+                            {
+                                "name": "run",
+                                "mountPath": "/run"
+                            },
+                            {
+                                "name": "varlog",
+                                "mountPath": "/var/log"
+                            },
+                            {
+                                "name": "machine-id",
+                                "mountPath": "/etc/machine-id"
+                            }
+                        ],
+                        "securityContext": {
+                            "privileged": True,
+                            "runAsUser": 0
+                        },
+                        "stdin": True,
+                        "stdinOnce": True,
+                        "tty": True
+                    }
+                ],
+                "restartPolicy": "Never",
+                "nodeName": self.address,
+                "hostNetwork": True,
+                "hostPID": True,
+                "hostIPC": True
+            }
+        }
+
+    def _connect(self, password):
+        # the oc binary must be _locally_ available for this to work
+        if not is_executable('oc'):
+            return False
+
+        # deploy the debug container we'll exec into
+        podconf = self.get_node_pod_config()
+        self.pod_name = podconf['metadata']['name']
+        fd, self.pod_tmp_conf = tempfile.mkstemp(dir=self.tmpdir)
+        with open(fd, 'w') as cfile:
+            json.dump(podconf, cfile)
+        self.log_debug("Starting sos collector container '%s'" % self.pod_name)
+        # this specifically does not need to run with a project definition
+        out = sos_get_command_output(
+            "oc create -f %s" % self.pod_tmp_conf
+        )
+        if (out['status'] != 0 or "pod/%s created" % self.pod_name not in
+                out['output']):
+            self.log_error("Unable to deploy sos collect pod")
+            self.log_debug("Debug pod deployment failed: %s" % out['output'])
+            return False
+        self.log_debug("Pod '%s' successfully deployed, waiting for pod to "
+                       "enter ready state" % self.pod_name)
+
+        # wait for the pod to report as running
+        try:
+            up = self.run_oc("wait --for=condition=Ready pod/%s --timeout=30s"
+                             % self.pod_name,
+                             # timeout is for local safety, not oc
+                             timeout=40)
+            if not up['status'] == 0:
+                self.log_error("Pod not available after 30 seconds")
+                return False
+        except SoSTimeoutError:
+            self.log_error("Timeout while polling for pod readiness")
+            return False
+        except Exception as err:
+            self.log_error("Error while waiting for pod to be ready: %s"
+                           % err)
+            return False
+
+        return True
+
+    def _format_cmd_for_exec(self, cmd):
+        if cmd.startswith('oc'):
+            return ("oc -n %s exec --request-timeout=0 %s -- chroot /host %s"
+                    % (self.project, self.pod_name, cmd))
+        return super(OCTransport, self)._format_cmd_for_exec(cmd)
+
+    def run_command(self, cmd, timeout=180, need_root=False, env=None,
+                    get_pty=False):
+        # debug pod setup is slow, extend all timeouts to account for this
+        if timeout:
+            timeout += 10
+
+        # since we always execute within a bash shell, force disable get_pty
+        # to avoid double-quoting
+        return super(OCTransport, self).run_command(cmd, timeout, need_root,
+                                                    env, False)
+
+    def _disconnect(self):
+        os.unlink(self.pod_tmp_conf)
+        removed = self.run_oc("delete pod %s" % self.pod_name)
+        if "deleted" not in removed['output']:
+            self.log_debug("Calling delete on pod '%s' failed: %s"
+                           % (self.pod_name, removed))
+            return False
+        return True
+
+    @property
+    def remote_exec(self):
+        return ("oc -n %s exec --request-timeout=0 %s -- /bin/bash -c"
+                % (self.project, self.pod_name))
+
+    def _retrieve_file(self, fname, dest):
+        cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest))
+        return cmd['status'] == 0
-- 
2.31.1


From 460494c4296db1a7529b44fe8f6597544c917c02 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Mon, 11 Oct 2021 11:50:44 -0400
Subject: [PATCH 3/4] [ocp] Create temporary project and restrict default node
 list to masters

Adds explicit setup of a new project to use in the `ocp` cluster and
adds better handling of cluster setup generally, which the `ocp` cluster
is the first to make use of.

Included in this change is a correction to
`Cluster.exec_primary_cmd()`'s use of `get_pty` to now be determined on
if the primary node is the local node or not.

Additionally, based on feedback from the OCP engineering team, by
default restrict node lists to masters.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/__init__.py          |  5 ++++
 sos/collector/clusters/__init__.py | 13 +++++++-
 sos/collector/clusters/ocp.py      | 48 ++++++++++++++++++++++++++++--
 sos/collector/transports/oc.py     |  4 +--
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index fecfe6aa..a76f8a79 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -850,6 +850,7 @@ class SoSCollector(SoSComponent):
                       "CTRL-C to quit\n")
                 self.ui_log.info("")
             except KeyboardInterrupt:
+                self.cluster.cleanup()
                 self.exit("Exiting on user cancel", 130)
 
     def configure_sos_cmd(self):
@@ -1098,6 +1099,7 @@ this utility or remote systems that it connects to.
         self.archive.makedirs('sos_logs', 0o755)
 
         self.collect()
+        self.cluster.cleanup()
         self.cleanup()
 
     def collect(self):
@@ -1156,9 +1158,11 @@ this utility or remote systems that it connects to.
             pool.shutdown(wait=True)
         except KeyboardInterrupt:
             self.log_error('Exiting on user cancel\n')
+            self.cluster.cleanup()
             os._exit(130)
         except Exception as err:
             self.log_error('Could not connect to nodes: %s' % err)
+            self.cluster.cleanup()
             os._exit(1)
 
         if hasattr(self.cluster, 'run_extra_cmd'):
@@ -1199,6 +1199,7 @@ this utility or remote systems that it c
             arc_name = self.create_cluster_archive()
         else:
             msg = 'No sosreports were collected, nothing to archive...'
+            self.cluster.cleanup()
             self.exit(msg, 1)
 
         if self.opts.upload and self.policy.get_upload_url():
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
index cf1e7a0b..2a4665a1 100644
--- a/sos/collector/clusters/__init__.py
+++ b/sos/collector/clusters/__init__.py
@@ -192,7 +192,8 @@ class Cluster():
         :returns: The output and status of `cmd`
         :rtype: ``dict``
         """
-        res = self.master.run_command(cmd, get_pty=True, need_root=need_root)
+        pty = self.master.local is False
+        res = self.master.run_command(cmd, get_pty=pty, need_root=need_root)
         if res['output']:
             res['output'] = res['output'].replace('Password:', '')
         return res
@@ -223,6 +224,16 @@ class Cluster():
                 return True
         return False
 
+    def cleanup(self):
+        """
+        This may be overridden by clusters
+
+        Perform any necessary cleanup steps required by the cluster profile.
+        This helps ensure that sos does make lasting changes to the environment
+        in which we are running
+        """
+        pass
+
     def get_nodes(self):
         """
         This MUST be overridden by a cluster profile subclassing this class
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index a9357dbf..92da4e6e 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -23,10 +23,12 @@ class ocp(Cluster):
 
     api_collect_enabled = False
     token = None
+    project = 'sos-collect-tmp'
+    oc_cluster_admin = None
 
     option_list = [
         ('label', '', 'Colon delimited list of labels to select nodes with'),
-        ('role', '', 'Colon delimited list of roles to select nodes with'),
+        ('role', 'master', 'Colon delimited list of roles to filter on'),
         ('kubeconfig', '', 'Path to the kubeconfig file'),
         ('token', '', 'Service account token to use for oc authorization')
     ]
@@ -58,6 +58,42 @@ class ocp(Cluster):
         _who = self.fmt_oc_cmd('whoami')
         return self.exec_master_cmd(_who)['status'] == 0
 
+    def setup(self):
+        """Create the project that we will be executing in for any nodes'
+        collection via a container image
+        """
+        if not self.set_transport_type() == 'oc':
+            return
+
+        out = self.exec_master_cmd(self.fmt_oc_cmd("auth can-i '*' '*'"))
+        self.oc_cluster_admin = out['status'] == 0
+        if not self.oc_cluster_admin:
+            self.log_debug("Check for cluster-admin privileges returned false,"
+                           " cannot create project in OCP cluster")
+            raise Exception("Insufficient permissions to create temporary "
+                            "collection project.\nAborting...")
+
+        self.log_info("Creating new temporary project '%s'" % self.project)
+        ret = self.exec_master_cmd("oc new-project %s" % self.project)
+        if ret['status'] == 0:
+            return True
+
+        self.log_debug("Failed to create project: %s" % ret['output'])
+        raise Exception("Failed to create temporary project for collection. "
+                        "\nAborting...")
+
+    def cleanup(self):
+        """Remove the project we created to execute within
+        """
+        if self.project:
+            ret = self.exec_master_cmd("oc delete project %s" % self.project)
+            if not ret['status'] == 0:
+                self.log_error("Error deleting temporary project: %s"
+                               % ret['output'])
+            # don't leave the config on a non-existing project
+            self.exec_master_cmd("oc project default")
+        return True
+
     def _build_dict(self, nodelist):
         """From the output of get_nodes(), construct an easier-to-reference
         dict of nodes that will be used in determining labels, master status,
@@ -85,10 +123,10 @@ class ocp(Cluster):
         return nodes
 
     def set_transport_type(self):
-        if is_executable('oc'):
+        if is_executable('oc') or self.opts.transport == 'oc':
             return 'oc'
         self.log_info("Local installation of 'oc' not found or is not "
-                      "correctly configured. Will use ControlPersist")
+                      "correctly configured. Will use ControlPersist.")
         self.ui_log.warn(
             "Preferred transport 'oc' not available, will fallback to SSH."
         )
@@ -106,6 +144,10 @@ class ocp(Cluster):
             cmd += " -l %s" % quote(labels)
         res = self.exec_master_cmd(self.fmt_oc_cmd(cmd))
         if res['status'] == 0:
+            if self.get_option('role') == 'master':
+                self.log_warn("NOTE: By default, only master nodes are listed."
+                              "\nTo collect from all/more nodes, override the "
+                              "role option with '-c ocp.role=role1:role2'")
             roles = [r for r in self.get_option('role').split(':')]
             self.node_dict = self._build_dict(res['output'].splitlines())
             for node in self.node_dict:
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
index 649037b9..de044ccb 100644
--- a/sos/collector/transports/oc.py
+++ b/sos/collector/transports/oc.py
@@ -37,7 +37,7 @@ class OCTransport(RemoteTransport):
         execution
         """
         return sos_get_command_output(
-            "oc -n sos-collect-tmp %s" % cmd,
+            "oc -n %s %s" % (self.project, cmd),
             **kwargs
         )
 
@@ -58,7 +58,7 @@ class OCTransport(RemoteTransport):
             "apiVersion": "v1",
             "metadata": {
                 "name": "%s-sos-collector" % self.address.split('.')[0],
-                "namespace": "sos-collect-tmp"
+                "namespace": self.project
             },
             "priorityClassName": "system-cluster-critical",
             "spec": {
-- 
2.31.1


From 1bc0e9fe32491e764e622368bfe216f97bf32620 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Mon, 4 Oct 2021 15:12:04 -0400
Subject: [PATCH 4/4] [sosnode] Fix typo and small logic break

Fixes a typo in setting the non-primary node options from the ocp
profile against the sosnode object. Second, fixes a small break in
checksum handling for the manifest discovered during `oc` transport
testing for edge cases.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/clusters/ocp.py | 4 ++--
 sos/collector/sosnode.py      | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index 92da4e6e..22a7289a 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -183,7 +183,7 @@ class ocp(Cluster):
         if self.api_collect_enabled:
             # a primary has already been enabled for API collection, disable
             # it among others
-            node.plugin_options.append('openshift.no-oc=on')
+            node.plugopts.append('openshift.no-oc=on')
         else:
             _oc_cmd = 'oc'
             if node.host.containerized:
@@ -223,6 +223,6 @@ class ocp(Cluster):
 
     def set_node_options(self, node):
         # don't attempt OC API collections on non-primary nodes
-        node.plugin_options.append('openshift.no-oc=on')
+        node.plugopts.append('openshift.no-oc=on')
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 8a9dbd7a..ab7f23cc 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -714,7 +714,7 @@ class SosNode():
                     elif line.startswith("The checksum is: "):
                         checksum = line.split()[3]
 
-                if checksum is not None:
+                if checksum:
                     self.manifest.add_field('checksum', checksum)
                     if len(checksum) == 32:
                         self.manifest.add_field('checksum_type', 'md5')
@@ -722,6 +722,8 @@ class SosNode():
                         self.manifest.add_field('checksum_type', 'sha256')
                     else:
                         self.manifest.add_field('checksum_type', 'unknown')
+                else:
+                    self.manifest.add_field('checksum_type', 'unknown')
             else:
                 err = self.determine_sos_error(res['status'], res['output'])
                 self.log_debug("Error running sos report. rc = %s msg = %s"
-- 
2.31.1

From a7ffacd855fcf2e9a136c6946744cbc99bc91272 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Tue, 19 Oct 2021 14:31:37 -0400
Subject: [PATCH] [Plugin] Wrap add_service_status to add_cmd_output

The changes to allow writing command output to a file highlighted a
short coming in add_service_status - by wrapping to `_add_cmd_output()`
instead of `add_cmd_output()`, we are not applying the default values
for kwarg parameters and thus potentially causing undesired behavior
during the actual collection.

Fix this by wrapping to `add_cmd_output()`, which will then in turn call
`_add_cmd_output()`.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index 38529a9d..98f163ab 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -2493,7 +2493,7 @@ class Plugin():
         :param services: Service name(s) to collect statuses for
         :type services: ``str`` or a ``list`` of strings

-        :param kwargs:   Optional arguments to pass to _add_cmd_output
+        :param kwargs:   Optional arguments to pass to add_cmd_output
                          (timeout, predicate, suggest_filename,..)

         """
@@ -2508,7 +2508,7 @@ class Plugin():
             return

         for service in services:
-            self._add_cmd_output(cmd="%s %s" % (query, service), **kwargs)
+            self.add_cmd_output("%s %s" % (query, service), **kwargs)

     def add_journal(self, units=None, boot=None, since=None, until=None,
                     lines=None, allfields=False, output=None,
-- 
2.27.0

From 38a0533de3dd2613eefcc4865a2916e225e3ceed Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Tue, 9 Nov 2021 19:34:25 +0100
Subject: [PATCH] [presets] Optimise OCP preset for hundreds of network
 namespaces

Sos report on OCP having hundreds of namespaces timeouts in networking
plugin, as it collects >10 commands for each namespace.

Let use a balanced approach in:
- increasing network.timeout
- limiting namespaces to traverse
- disabling ethtool per namespace

to ensure sos report successfully finish in a reasonable time,
collecting rasonable amount of data.

Resolves: #2754

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
---
 sos/presets/redhat/__init__.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py
index e6d63611..865c9b6b 100644
--- a/sos/presets/redhat/__init__.py
+++ b/sos/presets/redhat/__init__.py
@@ -29,11 +29,15 @@ RHEL_DESC = RHEL_RELEASE_STR
 
 RHOSP = "rhosp"
 RHOSP_DESC = "Red Hat OpenStack Platform"
+RHOSP_OPTS = SoSOptions(plugopts=[
+                             'process.lsof=off',
+                             'networking.ethtool_namespaces=False',
+                             'networking.namespaces=200'])
 
 RHOCP = "ocp"
 RHOCP_DESC = "OpenShift Container Platform by Red Hat"
-RHOSP_OPTS = SoSOptions(plugopts=[
-                             'process.lsof=off',
+RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[
+                             'networking.timeout=600',
                              'networking.ethtool_namespaces=False',
                              'networking.namespaces=200'])
 
@@ -62,7 +66,7 @@ RHEL_PRESETS = {
     RHEL: PresetDefaults(name=RHEL, desc=RHEL_DESC),
     RHOSP: PresetDefaults(name=RHOSP, desc=RHOSP_DESC, opts=RHOSP_OPTS),
     RHOCP: PresetDefaults(name=RHOCP, desc=RHOCP_DESC, note=NOTE_SIZE_TIME,
-                          opts=_opts_all_logs_verify),
+                          opts=RHOCP_OPTS),
     RH_CFME: PresetDefaults(name=RH_CFME, desc=RH_CFME_DESC, note=NOTE_TIME,
                             opts=_opts_verify),
     RH_SATELLITE: PresetDefaults(name=RH_SATELLITE, desc=RH_SATELLITE_DESC,
-- 
2.31.1

From 97b93c7f8755d04bdeb4f93759c20dcb787f2046 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Tue, 2 Nov 2021 11:34:13 -0400
Subject: [PATCH] [Plugin] Rework get_container_logs to be more useful

`get_container_logs()` is now `add_container_logs()` to align it better
with our more common `add_*` methods for plugin collections.

Additionally, it has been extended to accept either a single string or a
list of strings like the other methods, and plugin authors may now
specify either specific container names or regexes.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/__init__.py | 22 +++++++++++++++++-----
 sos/report/plugins/rabbitmq.py |  2 +-
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index 08eee118..4b0e4fd5 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -2366,20 +2366,32 @@ class Plugin():
             return _runtime.volumes
         return []
 
-    def get_container_logs(self, container, **kwargs):
-        """Helper to get the ``logs`` output for a given container
+    def add_container_logs(self, containers, get_all=False, **kwargs):
+        """Helper to get the ``logs`` output for a given container or list
+        of container names and/or regexes.
 
         Supports passthru of add_cmd_output() options
 
-        :param container:   The name of the container to retrieve logs from
-        :type container: ``str``
+        :param containers:   The name of the container to retrieve logs from,
+                             may be a single name or a regex
+        :type containers:    ``str`` or ``list` of strs
+
+        :param get_all:     Should non-running containers also be queried?
+                            Default: False
+        :type get_all:      ``bool``
 
         :param kwargs:      Any kwargs supported by ``add_cmd_output()`` are
                             supported here
         """
         _runtime = self._get_container_runtime()
         if _runtime is not None:
-            self.add_cmd_output(_runtime.get_logs_command(container), **kwargs)
+            if isinstance(containers, str):
+                containers = [containers]
+            for container in containers:
+                _cons = self.get_all_containers_by_regex(container, get_all)
+                for _con in _cons:
+                    cmd = _runtime.get_logs_command(_con[1])
+                    self.add_cmd_output(cmd, **kwargs)
 
     def fmt_container_cmd(self, container, cmd, quotecmd=False):
         """Format a command to be executed by the loaded ``ContainerRuntime``
diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py
index e84b52da..1bfa741f 100644
--- a/sos/report/plugins/rabbitmq.py
+++ b/sos/report/plugins/rabbitmq.py
@@ -32,7 +32,7 @@ class RabbitMQ(Plugin, IndependentPlugin):
 
         if in_container:
             for container in container_names:
-                self.get_container_logs(container)
+                self.add_container_logs(container)
                 self.add_cmd_output(
                     self.fmt_container_cmd(container, 'rabbitmqctl report'),
                     foreground=True
-- 
2.31.1

From 83bade79da931225f12a1f40e576884bfe7173dd Mon Sep 17 00:00:00 2001
From: Michael Cambria <mcambria@redhat.com>
Date: Fri, 12 Nov 2021 09:07:24 -0500
Subject: [PATCH] Collect "ip route cache" data

Signed-off-by: Michael Cambria <mcambria@redhat.com>
---
 sos/report/plugins/networking.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py
index f2bdca06..cc93c9e9 100644
--- a/sos/report/plugins/networking.py
+++ b/sos/report/plugins/networking.py
@@ -95,6 +95,8 @@ class Networking(Plugin):
             "networkctl status -a",
             "ip route show table all",
             "ip -6 route show table all",
+            "ip -d route show cache",
+            "ip -d -6 route show cache",
             "ip -4 rule",
             "ip -6 rule",
             "ip -s -d link",
-- 
2.27.0

From 8bf602108f75db10e449eff5e2266c6466504086 Mon Sep 17 00:00:00 2001
From: Nadia Pinaeva <npinaeva@redhat.com>
Date: Thu, 2 Dec 2021 16:30:44 +0100
Subject: [PATCH] [clusters:ocp] fix get_nodes function

Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
---
 sos/collector/clusters/ocp.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index 22a7289a..2ce4e977 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -150,13 +150,13 @@ class ocp(Cluster):
                               "role option with '-c ocp.role=role1:role2'")
             roles = [r for r in self.get_option('role').split(':')]
             self.node_dict = self._build_dict(res['output'].splitlines())
-            for node in self.node_dict:
+            for node_name, node in self.node_dict.items():
                 if roles:
                     for role in roles:
-                        if role in node:
-                            nodes.append(node)
+                        if role == node['roles']:
+                            nodes.append(node_name)
                 else:
-                    nodes.append(node)
+                    nodes.append(node_name)
         else:
             msg = "'oc' command failed"
             if 'Missing or incomplete' in res['output']:
-- 
2.31.1

From 5d80ac6dc67e12ef00903436c088a1694f9a7dd7 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Wed, 1 Dec 2021 14:13:16 -0500
Subject: [PATCH] [collect] Catch command not found exceptions from pexpect

When running a command that does not exist on the system, catch the
resulting pexpect exception and return the proper status code rather
than allowing an untrapped exception.

Closes: #2768

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/transports/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py
index 7bffee62..33f2f66d 100644
--- a/sos/collector/transports/__init__.py
+++ b/sos/collector/transports/__init__.py
@@ -225,7 +225,11 @@ class RemoteTransport():
         if not env:
             env = None
 
-        result = pexpect.spawn(cmd, encoding='utf-8', env=env)
+        try:
+            result = pexpect.spawn(cmd, encoding='utf-8', env=env)
+        except pexpect.exceptions.ExceptionPexpect as err:
+            self.log_debug(err.value)
+            return {'status': 127, 'output': ''}
 
         _expects = [pexpect.EOF, pexpect.TIMEOUT]
         if need_root and self.opts.ssh_user != 'root':
-- 
2.31.1

From decb5d26c165e664fa879a669f2d80165181f0e1 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 2 Dec 2021 14:02:17 -0500
Subject: [PATCH] [report,collect] Add option to control default container
 runtime

Adds a new `--container-runtime` option that allows users to control
what default container runtime is used by plugins for container based
collections, effectively overriding policy defaults.

If no runtimes are active, this option is effectively ignored. If
however runtimes are active, but the requested one is not, raise an
exception to abort collection with an appropriate message to the user.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 man/en/sos-collect.1      |  6 ++++++
 man/en/sos-report.1       | 19 +++++++++++++++++++
 sos/collector/__init__.py |  4 ++++
 sos/collector/sosnode.py  |  6 ++++++
 sos/report/__init__.py    | 36 ++++++++++++++++++++++++++++++++++++
 5 files changed, 71 insertions(+)

diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
index a1f6c10e..9b0a5d7b 100644
--- a/man/en/sos-collect.1
+++ b/man/en/sos-collect.1
@@ -11,6 +11,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes
     [\-\-chroot CHROOT]
     [\-\-case\-id CASE_ID]
     [\-\-cluster\-type CLUSTER_TYPE]
+    [\-\-container\-runtime RUNTIME]
     [\-e ENABLE_PLUGINS]
     [--encrypt-key KEY]\fR
     [--encrypt-pass PASS]\fR
@@ -113,6 +114,11 @@ Example: \fBsos collect --cluster-type=kubernetes\fR will force the kubernetes p
 to be run, and thus set sosreport options and attempt to determine a list of nodes using
 that profile. 
 .TP
+\fB\-\-container\-runtime\fR RUNTIME
+\fB sos report\fR option. Using this with \fBcollect\fR will pass this option thru
+to nodes with sos version 4.3 or later. This option controls the default container
+runtime plugins will use for collections. See \fBman sos-report\fR.
+.TP
 \fB\-e\fR ENABLE_PLUGINS, \fB\-\-enable\-plugins\fR ENABLE_PLUGINS
 Sosreport option. Use this to enable a plugin that would otherwise not be run.
 
diff --git a/man/en/sos-report.1 b/man/en/sos-report.1
index e8efc8f8..464a77e5 100644
--- a/man/en/sos-report.1
+++ b/man/en/sos-report.1
@@ -19,6 +19,7 @@ sos report \- Collect and package diagnostic and support data
           [--plugin-timeout TIMEOUT]\fR
           [--cmd-timeout TIMEOUT]\fR
           [--namespaces NAMESPACES]\fR
+          [--container-runtime RUNTIME]\fR
           [-s|--sysroot SYSROOT]\fR
           [-c|--chroot {auto|always|never}\fR
           [--tmp-dir directory]\fR
@@ -299,6 +300,24 @@ Use '0' (default) for no limit - all namespaces will be used for collections.
 
 Note that specific plugins may provide a similar `namespaces` plugin option. If
 the plugin option is used, it will override this option.
+.TP
+.B \--container-runtime RUNTIME
+Force the use of the specified RUNTIME as the default runtime that plugins will
+use to collect data from and about containers and container images. By default,
+the setting of \fBauto\fR results in the local policy determining what runtime
+will be the default runtime (in configurations where multiple runtimes are installed
+and active).
+
+If no container runtimes are active, this option is ignored. If there are runtimes
+active, but not one with a name matching RUNTIME, sos will abort.
+
+Setting this to \fBnone\fR, \fBoff\fR, or \fBdisabled\fR will cause plugins to
+\fBNOT\fR leverage any active runtimes for collections. Note that if disabled, plugins
+specifically for runtimes (e.g. the podman or docker plugins) will still collect
+general data about the runtime, but will not inspect existing containers or images.
+
+Default: 'auto' (policy determined)
+.TP
 .B \--case-id NUMBER
 Specify a case identifier to associate with the archive.
 Identifiers may include alphanumeric characters, commas and periods ('.').
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index 42a7731d..3ad703d3 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -55,6 +55,7 @@ class SoSCollector(SoSComponent):
         'clean': False,
         'cluster_options': [],
         'cluster_type': None,
+        'container_runtime': 'auto',
         'domains': [],
         'enable_plugins': [],
         'encrypt_key': '',
@@ -268,6 +269,9 @@ class SoSCollector(SoSComponent):
         sos_grp.add_argument('--chroot', default='',
                              choices=['auto', 'always', 'never'],
                              help="chroot executed commands to SYSROOT")
+        sos_grp.add_argument("--container-runtime", default="auto",
+                             help="Default container runtime to use for "
+                                  "collections. 'auto' for policy control.")
         sos_grp.add_argument('-e', '--enable-plugins', action="extend",
                              help='Enable specific plugins for sosreport')
         sos_grp.add_argument('-k', '--plugin-option', '--plugopts',
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index ab7f23cc..f5957e17 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -586,6 +586,12 @@ class SosNode():
                 sos_opts.append('--cmd-timeout=%s'
                                 % quote(str(self.opts.cmd_timeout)))
 
+        if self.check_sos_version('4.3'):
+            if self.opts.container_runtime != 'auto':
+                sos_opts.append(
+                    "--container-runtime=%s" % self.opts.container_runtime
+                )
+
         self.update_cmd_from_cluster()
 
         sos_cmd = sos_cmd.replace(
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
index a6c72778..0daad82f 100644
--- a/sos/report/__init__.py
+++ b/sos/report/__init__.py
@@ -82,6 +82,7 @@ class SoSReport(SoSComponent):
         'case_id': '',
         'chroot': 'auto',
         'clean': False,
+        'container_runtime': 'auto',
         'keep_binary_files': False,
         'desc': '',
         'domains': [],
@@ -187,6 +188,7 @@ class SoSReport(SoSComponent):
             self.tempfile_util.clean()
             self._exit(1)
 
+        self._check_container_runtime()
         self._get_hardware_devices()
         self._get_namespaces()
 
@@ -218,6 +220,9 @@ class SoSReport(SoSComponent):
                                 dest="chroot", default='auto',
                                 help="chroot executed commands to SYSROOT "
                                      "[auto, always, never] (default=auto)")
+        report_grp.add_argument("--container-runtime", default="auto",
+                                help="Default container runtime to use for "
+                                     "collections. 'auto' for policy control.")
         report_grp.add_argument("--desc", "--description", type=str,
                                 action="store", default="",
                                 help="Description for a new preset",)
@@ -373,6 +378,37 @@ class SoSReport(SoSComponent):
         }
         # TODO: enumerate network devices, preferably with devtype info
 
+    def _check_container_runtime(self):
+        """Check the loaded container runtimes, and the policy default runtime
+        (if set), against any requested --container-runtime value. This can be
+        useful for systems that have multiple runtimes, such as RHCOS, but do
+        not have a clearly defined 'default' (or one that is determined based
+        entirely on configuration).
+        """
+        if self.opts.container_runtime != 'auto':
+            crun = self.opts.container_runtime.lower()
+            if crun in ['none', 'off', 'diabled']:
+                self.policy.runtimes = {}
+                self.soslog.info(
+                    "Disabled all container runtimes per user option."
+                )
+            elif not self.policy.runtimes:
+                msg = ("WARNING: No container runtimes are active, ignoring "
+                       "option to set default runtime to '%s'\n" % crun)
+                self.soslog.warn(msg)
+            elif crun not in self.policy.runtimes.keys():
+                valid = ', '.join(p for p in self.policy.runtimes.keys()
+                                  if p != 'default')
+                raise Exception("Cannot use container runtime '%s': no such "
+                                "runtime detected. Available runtimes: %s"
+                                % (crun, valid))
+            else:
+                self.policy.runtimes['default'] = self.policy.runtimes[crun]
+                self.soslog.info(
+                    "Set default container runtime to '%s'"
+                    % self.policy.runtimes['default'].name
+                )
+
     def get_fibre_devs(self):
         """Enumerate a list of fibrechannel devices on this system so that
         plugins can iterate over them
-- 
2.31.1

From 9d4b5af39d76ac99afa40d004fe9888633218356 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Fri, 3 Dec 2021 13:37:09 -0500
Subject: [PATCH 1/2] [Plugin] Add container parameter for add_cmd_output()

Adds a new `container` parameter for `Plugin.add_cmd_output()`, which if
set will format all commands passed to that call for execution in the
specified container.

`Plugin.fmt_container_cmd()` is called for this purpose, and has been
modified so that if the given container does not exist, an empty string
is returned instead, thus preventing execution on the host.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/__init__.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index e180ae17..3ff7c191 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -1707,7 +1707,7 @@ class Plugin():
                        chroot=True, runat=None, env=None, binary=False,
                        sizelimit=None, pred=None, subdir=None,
                        changes=False, foreground=False, tags=[],
-                       priority=10, cmd_as_tag=False):
+                       priority=10, cmd_as_tag=False, container=None):
         """Run a program or a list of programs and collect the output
 
         Output will be limited to `sizelimit`, collecting the last X amount
@@ -1772,6 +1772,10 @@ class Plugin():
         :param cmd_as_tag: Should the command string be automatically formatted
                            to a tag?
         :type cmd_as_tag: ``bool``
+
+        :param container: Run the specified `cmds` inside a container with this
+                          ID or name
+        :type container:  ``str``
         """
         if isinstance(cmds, str):
             cmds = [cmds]
@@ -1782,6 +1786,14 @@ class Plugin():
         if pred is None:
             pred = self.get_predicate(cmd=True)
         for cmd in cmds:
+            if container:
+                ocmd = cmd
+                cmd = self.fmt_container_cmd(container, cmd)
+                if not cmd:
+                    self._log_debug("Skipping command '%s' as the requested "
+                                    "container '%s' does not exist."
+                                    % (ocmd, container))
+                    continue
             self._add_cmd_output(cmd=cmd, suggest_filename=suggest_filename,
                                  root_symlink=root_symlink, timeout=timeout,
                                  stderr=stderr, chroot=chroot, runat=runat,
@@ -2420,7 +2432,7 @@ class Plugin():
         if self.container_exists(container):
             _runtime = self._get_container_runtime()
             return _runtime.fmt_container_cmd(container, cmd, quotecmd)
-        return cmd
+        return ''
 
     def is_module_loaded(self, module_name):
         """Determine whether specified module is loaded or not
-- 
2.31.1


From 874d2adfbff9e51dc902669af3c4a5083dbc19b1 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Fri, 3 Dec 2021 14:49:43 -0500
Subject: [PATCH 2/2] [plugins] Update existing plugins to use a_c_o container
 parameter

Updates plugins currently calling `fmt_container_cmd()` in their
`add_cmd_output()` calls to instead use the new `container` parameter
and rely on the automatic formatting.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/opencontrail.py        |  3 +--
 sos/report/plugins/openstack_database.py  | 20 ++++++--------------
 sos/report/plugins/openstack_designate.py |  6 ++----
 sos/report/plugins/openstack_ironic.py    |  3 +--
 sos/report/plugins/ovn_central.py         |  7 +++----
 sos/report/plugins/rabbitmq.py            | 11 ++++++-----
 9 files changed, 47 insertions(+), 69 deletions(-)

diff --git a/sos/report/plugins/opencontrail.py b/sos/report/plugins/opencontrail.py
index b368bffe..76c03e21 100644
--- a/sos/report/plugins/opencontrail.py
+++ b/sos/report/plugins/opencontrail.py
@@ -25,8 +25,7 @@ class OpenContrail(Plugin, IndependentPlugin):
             cnames = self.get_containers(get_all=True)
             cnames = [c[1] for c in cnames if 'opencontrail' in c[1]]
             for cntr in cnames:
-                _cmd = self.fmt_container_cmd(cntr, 'contrail-status')
-                self.add_cmd_output(_cmd)
+                self.add_cmd_output('contrail-status', container=cntr)
         else:
             self.add_cmd_output("contrail-status")
 
diff --git a/sos/report/plugins/openstack_database.py b/sos/report/plugins/openstack_database.py
index 1e98fabf..e9f84cf8 100644
--- a/sos/report/plugins/openstack_database.py
+++ b/sos/report/plugins/openstack_database.py
@@ -37,36 +37,28 @@ class OpenStackDatabase(Plugin):
     ]
 
     def setup(self):
-
-        in_container = False
         # determine if we're running databases on the host or in a container
         _db_containers = [
             'galera-bundle-.*',  # overcloud
             'mysql'  # undercloud
         ]
 
+        cname = None
         for container in _db_containers:
             cname = self.get_container_by_name(container)
-            if cname is not None:
-                in_container = True
+            if cname:
                 break
 
-        if in_container:
-            fname = "clustercheck_%s" % cname
-            cmd = self.fmt_container_cmd(cname, 'clustercheck')
-            self.add_cmd_output(cmd, timeout=15, suggest_filename=fname)
-        else:
-            self.add_cmd_output('clustercheck', timeout=15)
+        fname = "clustercheck_%s" % cname if cname else None
+        self.add_cmd_output('clustercheck', container=cname, timeout=15,
+                            suggest_filename=fname)
 
         if self.get_option('dump') or self.get_option('dumpall'):
             db_dump = self.get_mysql_db_string(container=cname)
             db_cmd = "mysqldump --opt %s" % db_dump
 
-            if in_container:
-                db_cmd = self.fmt_container_cmd(cname, db_cmd)
-
             self.add_cmd_output(db_cmd, suggest_filename='mysql_dump.sql',
-                                sizelimit=0)
+                                sizelimit=0, container=cname)
 
     def get_mysql_db_string(self, container=None):
 
diff --git a/sos/report/plugins/openstack_designate.py b/sos/report/plugins/openstack_designate.py
index 0ae991b0..a2ea37ab 100644
--- a/sos/report/plugins/openstack_designate.py
+++ b/sos/report/plugins/openstack_designate.py
@@ -20,12 +20,10 @@ class OpenStackDesignate(Plugin):
 
     def setup(self):
         # collect current pool config
-        pools_cmd = self.fmt_container_cmd(
-            self.get_container_by_name(".*designate_central"),
-            "designate-manage pool generate_file --file /dev/stdout")
 
         self.add_cmd_output(
-            pools_cmd,
+            "designate-manage pool generate_file --file /dev/stdout",
+            container=self.get_container_by_name(".*designate_central"),
             suggest_filename="openstack_designate_current_pools.yaml"
         )
 
diff --git a/sos/report/plugins/openstack_ironic.py b/sos/report/plugins/openstack_ironic.py
index c36fb6b6..49beb2d9 100644
--- a/sos/report/plugins/openstack_ironic.py
+++ b/sos/report/plugins/openstack_ironic.py
@@ -80,8 +80,7 @@ class OpenStackIronic(Plugin):
                                    'ironic_pxe_tftp', 'ironic_neutron_agent',
                                    'ironic_conductor', 'ironic_api']:
                 if self.container_exists('.*' + container_name):
-                    self.add_cmd_output(self.fmt_container_cmd(container_name,
-                                                               'rpm -qa'))
+                    self.add_cmd_output('rpm -qa', container=container_name)
 
         else:
             self.conf_list = [
diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py
index 914eda60..ddbf288d 100644
--- a/sos/report/plugins/ovn_central.py
+++ b/sos/report/plugins/ovn_central.py
@@ -123,11 +123,10 @@ class OVNCentral(Plugin):
 
         # If OVN is containerized, we need to run the above commands inside
         # the container.
-        cmds = [
-            self.fmt_container_cmd(self._container_name, cmd) for cmd in cmds
-        ]
 
-        self.add_cmd_output(cmds, foreground=True)
+        self.add_cmd_output(
+            cmds, foreground=True, container=self._container_name
+        )
 
         self.add_copy_spec("/etc/sysconfig/ovn-northd")
 
diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py
index 1bfa741f..607802e4 100644
--- a/sos/report/plugins/rabbitmq.py
+++ b/sos/report/plugins/rabbitmq.py
@@ -34,14 +34,15 @@ class RabbitMQ(Plugin, IndependentPlugin):
             for container in container_names:
                 self.add_container_logs(container)
                 self.add_cmd_output(
-                    self.fmt_container_cmd(container, 'rabbitmqctl report'),
+                    'rabbitmqctl report',
+                    container=container,
                     foreground=True
                 )
                 self.add_cmd_output(
-                    self.fmt_container_cmd(
-                        container, "rabbitmqctl eval "
-                        "'rabbit_diagnostics:maybe_stuck().'"),
-                    foreground=True, timeout=10
+                    "rabbitmqctl eval 'rabbit_diagnostics:maybe_stuck().'",
+                    container=container,
+                    foreground=True,
+                    timeout=10
                 )
         else:
             self.add_cmd_output("rabbitmqctl report")
-- 
2.31.1

From faa15754f82e9841cd624afe18dc2198644decdf Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Wed, 8 Dec 2021 13:51:20 -0500
Subject: [PATCH] [Policy,collect] Prevent remote node policies from setting
 local PATH

This commit fixes an issue where policies loaded for remote nodes when
using `sos collect` would override the PATH setting for the local
policy, which in turn could prevent successful execution of cluster
profile operations.

Related: #2777

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/policies/__init__.py         | 8 +++++---
 sos/policies/distros/__init__.py | 6 ++++--
 sos/policies/distros/debian.py   | 3 ++-
 sos/policies/distros/redhat.py   | 6 ++++--
 sos/policies/distros/suse.py     | 3 ++-
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/sos/policies/__init__.py b/sos/policies/__init__.py
index ef9188de..826d03a1 100644
--- a/sos/policies/__init__.py
+++ b/sos/policies/__init__.py
@@ -45,7 +45,7 @@ def load(cache={}, sysroot=None, init=None, probe_runtime=True,
     return cache['policy']
 
 
-class Policy(object):
+class Policy():
     """Policies represent distributions that sos supports, and define the way
     in which sos behaves on those distributions. A policy should define at
     minimum a way to identify the distribution, and a package manager to allow
@@ -111,7 +111,7 @@ any third party.
     presets_path = PRESETS_PATH
     _in_container = False
 
-    def __init__(self, sysroot=None, probe_runtime=True):
+    def __init__(self, sysroot=None, probe_runtime=True, remote_exec=None):
         """Subclasses that choose to override this initializer should call
         super() to ensure that they get the required platform bits attached.
         super(SubClass, self).__init__(). Policies that require runtime
@@ -122,7 +122,9 @@ any third party.
         self.probe_runtime = probe_runtime
         self.package_manager = PackageManager()
         self.valid_subclasses = [IndependentPlugin]
-        self.set_exec_path()
+        self.remote_exec = remote_exec
+        if not self.remote_exec:
+            self.set_exec_path()
         self.sysroot = sysroot
         self.register_presets(GENERIC_PRESETS)
 
diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
index c69fc1e7..9c91a918 100644
--- a/sos/policies/distros/__init__.py
+++ b/sos/policies/distros/__init__.py
@@ -68,9 +68,11 @@ class LinuxPolicy(Policy):
     container_version_command = None
     container_authfile = None
 
-    def __init__(self, sysroot=None, init=None, probe_runtime=True):
+    def __init__(self, sysroot=None, init=None, probe_runtime=True,
+                 remote_exec=None):
         super(LinuxPolicy, self).__init__(sysroot=sysroot,
-                                          probe_runtime=probe_runtime)
+                                          probe_runtime=probe_runtime,
+                                          remote_exec=remote_exec)
 
         if sysroot:
             self.sysroot = sysroot
diff --git a/sos/policies/distros/debian.py b/sos/policies/distros/debian.py
index 639fd5eb..41f09428 100644
--- a/sos/policies/distros/debian.py
+++ b/sos/policies/distros/debian.py
@@ -26,7 +26,8 @@ class DebianPolicy(LinuxPolicy):
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
                  remote_exec=None):
         super(DebianPolicy, self).__init__(sysroot=sysroot, init=init,
-                                           probe_runtime=probe_runtime)
+                                           probe_runtime=probe_runtime,
+                                           remote_exec=remote_exec)
         self.package_manager = DpkgPackageManager(chroot=self.sysroot,
                                                   remote_exec=remote_exec)
         self.valid_subclasses += [DebianPlugin]
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
index 4b14abaf..eb75e15b 100644
--- a/sos/policies/distros/redhat.py
+++ b/sos/policies/distros/redhat.py
@@ -53,7 +53,8 @@ class RedHatPolicy(LinuxPolicy):
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
                  remote_exec=None):
         super(RedHatPolicy, self).__init__(sysroot=sysroot, init=init,
-                                           probe_runtime=probe_runtime)
+                                           probe_runtime=probe_runtime,
+                                           remote_exec=remote_exec)
         self.usrmove = False
 
         self.package_manager = RpmPackageManager(chroot=self.sysroot,
@@ -76,7 +77,8 @@ class RedHatPolicy(LinuxPolicy):
             self.PATH = "/sbin:/bin:/usr/sbin:/usr/bin:/root/bin"
         self.PATH += os.pathsep + "/usr/local/bin"
         self.PATH += os.pathsep + "/usr/local/sbin"
-        self.set_exec_path()
+        if not self.remote_exec:
+            self.set_exec_path()
         self.load_presets()
 
     @classmethod
diff --git a/sos/policies/distros/suse.py b/sos/policies/distros/suse.py
index 1c1feff5..b9d4a3b1 100644
--- a/sos/policies/distros/suse.py
+++ b/sos/policies/distros/suse.py
@@ -25,7 +25,8 @@ class SuSEPolicy(LinuxPolicy):
     def __init__(self, sysroot=None, init=None, probe_runtime=True,
                  remote_exec=None):
         super(SuSEPolicy, self).__init__(sysroot=sysroot, init=init,
-                                         probe_runtime=probe_runtime)
+                                         probe_runtime=probe_runtime,
+                                         remote_exec=remote_exec)
         self.valid_subclasses += [SuSEPlugin, RedHatPlugin]
 
         self.usrmove = False
-- 
2.31.1

From d4383fec5f8a80121aa4f5a37575b37988c51663 Mon Sep 17 00:00:00 2001
From: Nadia Pinaeva <npinaeva@redhat.com>
Date: Wed, 1 Dec 2021 12:23:34 +0100
Subject: [PATCH] Add crio runtime and openshift_ovn plugin openshift_ovn
 plugin collects logs from crio containers Fix get_container_by_name function
 returning container_id and not name

Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
---
 sos/policies/distros/__init__.py    |  4 +-
 sos/policies/runtimes/__init__.py   |  2 +-
 sos/policies/runtimes/crio.py       | 79 +++++++++++++++++++++++++++++
 sos/report/plugins/openshift_ovn.py | 41 +++++++++++++++
 4 files changed, 124 insertions(+), 2 deletions(-)
 create mode 100644 sos/policies/runtimes/crio.py
 create mode 100644 sos/report/plugins/openshift_ovn.py

diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py
index 9c91a918..7acc7e49 100644
--- a/sos/policies/distros/__init__.py
+++ b/sos/policies/distros/__init__.py
@@ -17,6 +17,7 @@ from sos import _sos as _
 from sos.policies import Policy
 from sos.policies.init_systems import InitSystem
 from sos.policies.init_systems.systemd import SystemdInit
+from sos.policies.runtimes.crio import CrioContainerRuntime
 from sos.policies.runtimes.podman import PodmanContainerRuntime
 from sos.policies.runtimes.docker import DockerContainerRuntime
 
@@ -92,7 +93,8 @@ class LinuxPolicy(Policy):
         if self.probe_runtime:
             _crun = [
                 PodmanContainerRuntime(policy=self),
-                DockerContainerRuntime(policy=self)
+                DockerContainerRuntime(policy=self),
+                CrioContainerRuntime(policy=self)
             ]
             for runtime in _crun:
                 if runtime.check_is_active():
diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
index 2e60ad23..4e9a45c1 100644
--- a/sos/policies/runtimes/__init__.py
+++ b/sos/policies/runtimes/__init__.py
@@ -100,7 +100,7 @@ class ContainerRuntime():
             return None
         for c in self.containers:
             if re.match(name, c[1]):
-                return c[1]
+                return c[0]
         return None
 
     def get_images(self):
diff --git a/sos/policies/runtimes/crio.py b/sos/policies/runtimes/crio.py
new file mode 100644
index 00000000..980c3ea1
--- /dev/null
+++ b/sos/policies/runtimes/crio.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2021 Red Hat, Inc., Nadia Pinaeva <npinaeva@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.policies.runtimes import ContainerRuntime
+from sos.utilities import sos_get_command_output
+from pipes import quote
+
+
+class CrioContainerRuntime(ContainerRuntime):
+    """Runtime class to use for systems running crio"""
+
+    name = 'crio'
+    binary = 'crictl'
+
+    def get_containers(self, get_all=False):
+        """Get a list of containers present on the system.
+
+        :param get_all: If set, include stopped containers as well
+        :type get_all: ``bool``
+        """
+        containers = []
+        _cmd = "%s ps %s" % (self.binary, '-a' if get_all else '')
+        if self.active:
+            out = sos_get_command_output(_cmd, chroot=self.policy.sysroot)
+            if out['status'] == 0:
+                for ent in out['output'].splitlines()[1:]:
+                    ent = ent.split()
+                    # takes the form (container_id, container_name)
+                    containers.append((ent[0], ent[-3]))
+        return containers
+
+    def get_images(self):
+        """Get a list of images present on the system
+
+        :returns: A list of 2-tuples containing (image_name, image_id)
+        :rtype: ``list``
+        """
+        images = []
+        if self.active:
+            out = sos_get_command_output("%s images" % self.binary,
+                                         chroot=self.policy.sysroot)
+            if out['status'] == 0:
+                for ent in out['output'].splitlines():
+                    ent = ent.split()
+                    # takes the form (image_name, image_id)
+                    images.append((ent[0] + ':' + ent[1], ent[2]))
+        return images
+
+    def fmt_container_cmd(self, container, cmd, quotecmd):
+        """Format a command to run inside a container using the runtime
+
+        :param container: The name or ID of the container in which to run
+        :type container: ``str``
+
+        :param cmd: The command to run inside `container`
+        :type cmd: ``str``
+
+        :param quotecmd: Whether the cmd should be quoted.
+        :type quotecmd: ``bool``
+
+        :returns: Formatted string to run `cmd` inside `container`
+        :rtype: ``str``
+        """
+        if quotecmd:
+            quoted_cmd = quote(cmd)
+        else:
+            quoted_cmd = cmd
+        container_id = self.get_container_by_name(container)
+        return "%s %s %s" % (self.run_cmd, container_id,
+                             quoted_cmd) if container_id is not None else ''
+
+# vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py
new file mode 100644
index 00000000..168f1dd3
--- /dev/null
+++ b/sos/report/plugins/openshift_ovn.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2021 Nadia Pinaeva <npinaeva@redhat.com>
+
+# This file is part of the sos project: https://github.com/sosreport/sos
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions of
+# version 2 of the GNU General Public License.
+#
+# See the LICENSE file in the source distribution for further information.
+
+from sos.report.plugins import Plugin, RedHatPlugin
+
+
+class OpenshiftOVN(Plugin, RedHatPlugin):
+    """This plugin is used to collect OCP 4.x OVN logs.
+    """
+    short_desc = 'Openshift OVN'
+    plugin_name = "openshift_ovn"
+    containers = ('ovnkube-master', 'ovn-ipsec')
+    profiles = ('openshift',)
+
+    def setup(self):
+        self.add_copy_spec([
+            "/var/lib/ovn/etc/ovnnb_db.db",
+            "/var/lib/ovn/etc/ovnsb_db.db",
+            "/var/lib/openvswitch/etc/keys",
+            "/var/log/openvswitch/libreswan.log",
+            "/var/log/openvswitch/ovs-monitor-ipsec.log"
+        ])
+
+        self.add_cmd_output([
+            'ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ' +
+            'cluster/status OVN_Northbound',
+            'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' +
+            'cluster/status OVN_Southbound'],
+            container='ovnkube-master')
+        self.add_cmd_output([
+            'ovs-appctl -t ovs-monitor-ipsec tunnels/show',
+            'ipsec status',
+            'certutil -L -d sql:/etc/ipsec.d'],
+            container='ovn-ipsec')
-- 
2.31.1

From 17218ca17e49cb8491c688095b56503d041c1ae9 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 9 Dec 2021 15:07:23 -0500
Subject: [PATCH 1/3] [ocp] Skip project setup whenever oc transport is not
 used

Fixes a corner case where we would still attempt to create a new project
within the OCP cluster even if we weren't using the `oc` transport.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/clusters/ocp.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index 2ce4e977..56f8cc47 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -123,7 +123,9 @@ class ocp(Cluster):
         return nodes
 
     def set_transport_type(self):
-        if is_executable('oc') or self.opts.transport == 'oc':
+        if self.opts.transport != 'auto':
+            return self.opts.transport
+        if is_executable('oc'):
             return 'oc'
         self.log_info("Local installation of 'oc' not found or is not "
                       "correctly configured. Will use ControlPersist.")
-- 
2.31.1


From 9faabdc3df08516a91c1adb3326bbf43db155f71 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 9 Dec 2021 16:04:39 -0500
Subject: [PATCH 2/3] [crio] Put inspect output in the containers subdir

Given the environments where crio is run, having `crictl inspect` output
in the main plugin directory can be a bit overwhelming. As such, put
this output into a `containers` subdir, and nest container log output in
a `containers/logs/` subdir.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/crio.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sos/report/plugins/crio.py b/sos/report/plugins/crio.py
index cb2c9796..56cf64a7 100644
--- a/sos/report/plugins/crio.py
+++ b/sos/report/plugins/crio.py
@@ -79,10 +79,11 @@ class CRIO(Plugin, RedHatPlugin, UbuntuPlugin):
         pods = self._get_crio_list(pod_cmd)
 
         for container in containers:
-            self.add_cmd_output("crictl inspect %s" % container)
+            self.add_cmd_output("crictl inspect %s" % container,
+                                subdir="containers")
             if self.get_option('logs'):
                 self.add_cmd_output("crictl logs -t %s" % container,
-                                    subdir="containers", priority=100)
+                                    subdir="containers/logs", priority=100)
 
         for image in images:
             self.add_cmd_output("crictl inspecti %s" % image, subdir="images")
-- 
2.31.1


From 9118562c47fb521da3eeeed1a8746d45aaa769e7 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 9 Dec 2021 16:06:06 -0500
Subject: [PATCH 3/3] [networking] Put namespaced commands into subdirs

Where networking namespaces are used, there tend to be large numbers of
namespaces used. This in turn results in sos running and collecting very
large numbers of namespaced commands.

To aid in consumability, place these collections under a subdir for the
namespace under another "namespaces" subdir within the plugin directory.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/networking.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py
index 80e24abb..bcb5e6ae 100644
--- a/sos/report/plugins/networking.py
+++ b/sos/report/plugins/networking.py
@@ -198,6 +198,7 @@ class Networking(Plugin):
                                   pred=SoSPredicate(self, cmd_outputs=co6))
                                   else None)
         for namespace in namespaces:
+            _subdir = "namespaces/%s" % namespace
             ns_cmd_prefix = cmd_prefix + namespace + " "
             self.add_cmd_output([
                 ns_cmd_prefix + "ip address show",
@@ -213,29 +214,27 @@ class Networking(Plugin):
                 ns_cmd_prefix + "netstat -s",
                 ns_cmd_prefix + "netstat %s -agn" % self.ns_wide,
                 ns_cmd_prefix + "nstat -zas",
-            ], priority=50)
+            ], priority=50, subdir=_subdir)
             self.add_cmd_output([ns_cmd_prefix + "iptables-save"],
                                 pred=iptables_with_nft,
+                                subdir=_subdir,
                                 priority=50)
             self.add_cmd_output([ns_cmd_prefix + "ip6tables-save"],
                                 pred=ip6tables_with_nft,
+                                subdir=_subdir,
                                 priority=50)
 
             ss_cmd = ns_cmd_prefix + "ss -peaonmi"
             # --allow-system-changes is handled directly in predicate
             # evaluation, so plugin code does not need to separately
             # check for it
-            self.add_cmd_output(ss_cmd, pred=ss_pred)
-
-        # Collect ethtool commands only when ethtool_namespaces
-        # is set to true.
-        if self.get_option("ethtool_namespaces"):
-            # Devices that exist in a namespace use less ethtool
-            # parameters. Run this per namespace.
-            for namespace in self.get_network_namespaces(
-                                self.get_option("namespace_pattern"),
-                                self.get_option("namespaces")):
-                ns_cmd_prefix = cmd_prefix + namespace + " "
+            self.add_cmd_output(ss_cmd, pred=ss_pred, subdir=_subdir)
+
+            # Collect ethtool commands only when ethtool_namespaces
+            # is set to true.
+            if self.get_option("ethtool_namespaces"):
+                # Devices that exist in a namespace use less ethtool
+                # parameters. Run this per namespace.
                 netns_netdev_list = self.exec_cmd(
                     ns_cmd_prefix + "ls -1 /sys/class/net/"
                 )
@@ -250,9 +249,7 @@ class Networking(Plugin):
                         ns_cmd_prefix + "ethtool -i " + eth,
                         ns_cmd_prefix + "ethtool -k " + eth,
                         ns_cmd_prefix + "ethtool -S " + eth
-                    ], priority=50)
-
-        return
+                    ], priority=50, subdir=_subdir)
 
 
 class RedHatNetworking(Networking, RedHatPlugin):
-- 
2.31.1

From 4bf5f9143c962c839c1d27217ba74127551a5c00 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Fri, 17 Dec 2021 11:10:15 -0500
Subject: [PATCH] [transport] Detect retrieval failures and automatically retry

If a paritcular attempt to retrieve a remote file fails, we should
automatically retry that collection up to a certain point. This provides
`sos collect` more resiliency for the collection of sos report archives.

This change necessitates a change in how we handle the SoSNode flow for
failed sos report retrievals, and as such contains minor fixes to
transports to ensure that we do not incorrectly hit exceptions in error
handling that were not previously possible with how we exited the
SoSNode retrieval flow.

Closes: #2777

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/__init__.py            |  5 +++--
 sos/collector/clusters/ocp.py        |  1 +
 sos/collector/sosnode.py             | 17 ++++++++++-------
 sos/collector/transports/__init__.py | 15 ++++++++++++++-
 sos/collector/transports/local.py    |  1 +
 sos/collector/transports/oc.py       |  3 ++-
 6 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index b825d8fc..a25e794e 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -1221,8 +1221,9 @@ this utility or remote systems that it connects to.
     def close_all_connections(self):
         """Close all sessions for nodes"""
         for client in self.client_list:
-            self.log_debug('Closing connection to %s' % client.address)
-            client.disconnect()
+            if client.connected:
+                self.log_debug('Closing connection to %s' % client.address)
+                client.disconnect()
 
     def create_cluster_archive(self):
         """Calls for creation of tar archive then cleans up the temporary
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
index 56f8cc47..ae93ad58 100644
--- a/sos/collector/clusters/ocp.py
+++ b/sos/collector/clusters/ocp.py
@@ -92,6 +92,7 @@ class ocp(Cluster):
                                % ret['output'])
             # don't leave the config on a non-existing project
             self.exec_master_cmd("oc project default")
+            self.project = None
         return True
 
     def _build_dict(self, nodelist):
diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 1341e39f..925f2790 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -751,12 +751,11 @@ class SosNode():
             if self.file_exists(path):
                 self.log_info("Copying remote %s to local %s" %
                               (path, destdir))
-                self._transport.retrieve_file(path, dest)
+                return self._transport.retrieve_file(path, dest)
             else:
                 self.log_debug("Attempting to copy remote file %s, but it "
                                "does not exist on filesystem" % path)
                 return False
-            return True
         except Exception as err:
             self.log_debug("Failed to retrieve %s: %s" % (path, err))
             return False
@@ -793,16 +792,20 @@ class SosNode():
                 except Exception:
                     self.log_error('Failed to make archive readable')
                     return False
-            self.soslog.info('Retrieving sos report from %s' % self.address)
+            self.log_info('Retrieving sos report from %s' % self.address)
             self.ui_msg('Retrieving sos report...')
-            ret = self.retrieve_file(self.sos_path)
+            try:
+                ret = self.retrieve_file(self.sos_path)
+            except Exception as err:
+                self.log_error(err)
+                return False
             if ret:
                 self.ui_msg('Successfully collected sos report')
                 self.file_list.append(self.sos_path.split('/')[-1])
+                return True
             else:
-                self.log_error('Failed to retrieve sos report')
-                raise SystemExit
-            return True
+                self.ui_msg('Failed to retrieve sos report')
+                return False
         else:
             # sos sometimes fails but still returns a 0 exit code
             if self.stderr.read():
diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py
index 33f2f66d..dcdebdde 100644
--- a/sos/collector/transports/__init__.py
+++ b/sos/collector/transports/__init__.py
@@ -303,7 +303,20 @@ class RemoteTransport():
         :returns:   True if file was successfully copied from remote, or False
         :rtype:     ``bool``
         """
-        return self._retrieve_file(fname, dest)
+        attempts = 0
+        try:
+            while attempts < 5:
+                attempts += 1
+                ret = self._retrieve_file(fname, dest)
+                if ret:
+                    return True
+                self.log_info("File retrieval attempt %s failed" % attempts)
+            self.log_info("File retrieval failed after 5 attempts")
+            return False
+        except Exception as err:
+            self.log_error("Exception encountered during retrieval attempt %s "
+                           "for %s: %s" % (attempts, fname, err))
+            raise err
 
     def _retrieve_file(self, fname, dest):
         raise NotImplementedError("Transport %s does not support file copying"
diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py
index a4897f19..2996d524 100644
--- a/sos/collector/transports/local.py
+++ b/sos/collector/transports/local.py
@@ -35,6 +35,7 @@ class LocalTransport(RemoteTransport):
     def _retrieve_file(self, fname, dest):
         self.log_debug("Moving %s to %s" % (fname, dest))
         shutil.copy(fname, dest)
+        return True
 
     def _format_cmd_for_exec(self, cmd):
         return cmd
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
index de044ccb..720dd61d 100644
--- a/sos/collector/transports/oc.py
+++ b/sos/collector/transports/oc.py
@@ -202,7 +202,8 @@ class OCTransport(RemoteTransport):
                                                     env, False)
 
     def _disconnect(self):
-        os.unlink(self.pod_tmp_conf)
+        if os.path.exists(self.pod_tmp_conf):
+            os.unlink(self.pod_tmp_conf)
         removed = self.run_oc("delete pod %s" % self.pod_name)
         if "deleted" not in removed['output']:
             self.log_debug("Calling delete on pod '%s' failed: %s"
-- 
2.31.1

From 304c9ef6c1015f1ebe1a8d569c3e16bada4d23f1 Mon Sep 17 00:00:00 2001
From: Nadia Pinaeva <npinaeva@redhat.com>
Date: Tue, 4 Jan 2022 16:37:09 +0100
Subject: [PATCH] Add cluster cleanup for all exit() calls

Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
---
 sos/collector/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index a25e794e1..ffd63bc63 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -443,6 +443,7 @@ def add_parser_options(cls, parser):
 
     def exit(self, msg, error=1):
         """Used to safely terminate if sos-collector encounters an error"""
+        self.cluster.cleanup()
         self.log_error(msg)
         try:
             self.close_all_connections()
@@ -858,8 +858,9 @@ class SoSCollector(SoSComponent):
                       "CTRL-C to quit\n")
                 self.ui_log.info("")
             except KeyboardInterrupt:
-                self.cluster.cleanup()
                 self.exit("Exiting on user cancel", 130)
+            except Exception as e:
+                self.exit(repr(e), 1)
 
     def configure_sos_cmd(self):
         """Configures the sosreport command that is run on the nodes"""
@@ -1185,7 +1185,6 @@ def collect(self):
             arc_name = self.create_cluster_archive()
         else:
             msg = 'No sosreports were collected, nothing to archive...'
-            self.cluster.cleanup()
             self.exit(msg, 1)
 
         if self.opts.upload and self.policy.get_upload_url():
From 2c3a647817dfbac36be3768acf6026e91d1a6e8f Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Tue, 21 Dec 2021 14:20:19 -0500
Subject: [PATCH] [options] Allow spaces in --keywords values in sos.conf

The `--keywords` option supports spaces to allow for obfuscated phrases,
not just words. This however breaks if a phrase is added to the config
file *before* a run with the phrase in the cmdline option, due to the
safeguards we have for all other values that do not support spaces.

Add a check in our flow for updating options from the config file to not
replace illegal spaces if we're checking the `keywords` option, for
which spaces are legal.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/options.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sos/options.py b/sos/options.py
index 7bea3ffc1..4846a5096 100644
--- a/sos/options.py
+++ b/sos/options.py
@@ -200,7 +200,10 @@ def _update_from_section(section, config):
                         odict[rename_opts[key]] = odict.pop(key)
                 # set the values according to the config file
                 for key, val in odict.items():
-                    if isinstance(val, str):
+                    # most option values do not tolerate spaces, special
+                    # exception however for --keywords which we do want to
+                    # support phrases, and thus spaces, for
+                    if isinstance(val, str) and key != 'keywords':
                         val = val.replace(' ', '')
                     if key not in self.arg_defaults:
                         # read an option that is not loaded by the current
From f912fc9e31b406a24b7a9c012e12cda920632051 Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Mon, 10 Jan 2022 14:13:42 +0100
Subject: [PATCH] [collect] Deal None sos_version properly

In case collector cluster hits an error during init, sos_version
is None what LooseVersion can't compare properly and raises exception

'LooseVersion' object has no attribute 'version'

Related: #2822

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
---
 sos/collector/sosnode.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 925f27909..7bbe0cd1f 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -382,7 +382,8 @@ def check_sos_version(self, ver):
         given ver. This means that if the installed version is greater than
         ver, this will still return True
         """
-        return LooseVersion(self.sos_info['version']) >= ver
+        return self.sos_info['version'] is not None and \
+            LooseVersion(self.sos_info['version']) >= ver
 
     def is_installed(self, pkg):
         """Checks if a given package is installed on the node"""
From 0c67e8ebaeef17dac3b5b9e42a59b4e673e4403b Mon Sep 17 00:00:00 2001
From: Pavel Moravec <pmoravec@redhat.com>
Date: Mon, 10 Jan 2022 14:17:13 +0100
Subject: [PATCH] [collector] Cleanup cluster only if defined

In case cluster init fails, self.cluster = None and its cleanup
must be skipped.

Resolves: #2822

Signed-off-by: Pavel Moravec <pmoravec@redhat.com>
---
 sos/collector/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
index ffd63bc63..3e22bca3e 100644
--- a/sos/collector/__init__.py
+++ b/sos/collector/__init__.py
@@ -443,7 +443,8 @@ def add_parser_options(cls, parser):
 
     def exit(self, msg, error=1):
         """Used to safely terminate if sos-collector encounters an error"""
-        self.cluster.cleanup()
+        if self.cluster:
+            self.cluster.cleanup()
         self.log_error(msg)
         try:
             self.close_all_connections()
From ef27a6ee6737c23b3beda1437768a91679024697 Mon Sep 17 00:00:00 2001
From: Nadia Pinaeva <npinaeva@redhat.com>
Date: Fri, 3 Dec 2021 15:41:35 +0100
Subject: [PATCH] Add journal logs for NetworkManager plugin

Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
---
 sos/report/plugins/networkmanager.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sos/report/plugins/networkmanager.py b/sos/report/plugins/networkmanager.py
index 30f99a1140..3aca0c7460 100644
--- a/sos/report/plugins/networkmanager.py
+++ b/sos/report/plugins/networkmanager.py
@@ -25,6 +25,8 @@ def setup(self):
             "/etc/NetworkManager/dispatcher.d"
         ])
 
+        self.add_journal(units="NetworkManager")
+
         # There are some incompatible changes in nmcli since
         # the release of NetworkManager >= 0.9.9. In addition,
         # NetworkManager >= 0.9.9 will use the long names of
From 9eb60f0bb6ea36f9c1cf099c1fd20cf3938b4b26 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Mon, 17 Jan 2022 11:11:24 -0500
Subject: [PATCH] [clean] Ignore empty items for obfuscation better

This commit fixes a couple edge cases where an item empty (e.g. and
empty string '') was not being properly ignored, which in turned caused
failures in writing both obfuscations and replacement files.

This should no longer be possible.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/cleaner/mappings/__init__.py       | 5 ++++-
 sos/cleaner/mappings/username_map.py   | 2 +-
 sos/cleaner/parsers/username_parser.py | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/sos/cleaner/mappings/__init__.py b/sos/cleaner/mappings/__init__.py
index 5cf5c8b2d..48171a052 100644
--- a/sos/cleaner/mappings/__init__.py
+++ b/sos/cleaner/mappings/__init__.py
@@ -49,6 +49,8 @@ def add(self, item):
             :param item:        The plaintext object to obfuscate
         """
         with self.lock:
+            if not item:
+                return item
             self.dataset[item] = self.sanitize_item(item)
             return self.dataset[item]
 
@@ -67,7 +69,8 @@ def get(self, item):
         """Retrieve an item's obfuscated counterpart from the map. If the item
         does not yet exist in the map, add it by generating one on the fly
         """
-        if self.ignore_item(item) or self.item_in_dataset_values(item):
+        if (not item or self.ignore_item(item) or
+                self.item_in_dataset_values(item)):
             return item
         if item not in self.dataset:
             return self.add(item)
diff --git a/sos/cleaner/mappings/username_map.py b/sos/cleaner/mappings/username_map.py
index 7ecccd7bc..ed6dc0912 100644
--- a/sos/cleaner/mappings/username_map.py
+++ b/sos/cleaner/mappings/username_map.py
@@ -24,7 +24,7 @@ class SoSUsernameMap(SoSMap):
 
     def load_names_from_options(self, opt_names):
         for name in opt_names:
-            if name not in self.dataset.keys():
+            if name and name not in self.dataset.keys():
                 self.add(name)
 
     def sanitize_item(self, username):
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
index 49640f7fd..2853c860f 100644
--- a/sos/cleaner/parsers/username_parser.py
+++ b/sos/cleaner/parsers/username_parser.py
@@ -55,7 +55,7 @@ def load_usernames_into_map(self, content):
                 user = line.split()[0]
             except Exception:
                 continue
-            if user.lower() in self.skip_list:
+            if not user or user.lower() in self.skip_list:
                 continue
             users.add(user)
         for each in users:
From ed618678fd3d07e68e1a430eb7d225a9701332e0 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 13 Jan 2022 13:52:34 -0500
Subject: [PATCH] [clean,parsers] Build regex lists for static items only once

For parsers such as the username and keyword parsers, we don't discover
new items through parsing archives - these parsers use static lists
determined before we begin the actual obfuscation process.

As such, we can build a list of regexes for these static items once, and
then reference those regexes during execution, rather than rebuilding
the regex for each of these items for every obfuscation.

For use cases where hundreds of items, e.g. hundreds of usernames, are
being obfuscated this results in a significant performance increase.
Individual per-file gains are minor - fractions of a second - however
these gains build up over the course of the hundreds to thousands of
files a typical archive can be expected to contain.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/cleaner/__init__.py                |  9 +++++++++
 sos/cleaner/parsers/__init__.py        | 10 ++++++++++
 sos/cleaner/parsers/keyword_parser.py  | 15 ++++++++++-----
 sos/cleaner/parsers/username_parser.py | 14 ++++++++------
 tests/unittests/cleaner_tests.py       |  1 +
 5 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
index 5686e2131..b76bef644 100644
--- a/sos/cleaner/__init__.py
+++ b/sos/cleaner/__init__.py
@@ -294,6 +294,7 @@ def execute(self):
         # we have at least one valid target to obfuscate
         self.completed_reports = []
         self.preload_all_archives_into_maps()
+        self.generate_parser_item_regexes()
         self.obfuscate_report_paths()
 
         if not self.completed_reports:
@@ -498,6 +499,14 @@ def _replace_obfuscated_archives(self):
             shutil.move(archive.final_archive_path, dest)
             archive.final_archive_path = dest_name
 
+    def generate_parser_item_regexes(self):
+        """For the parsers that use prebuilt lists of items, generate those
+        regexes now since all the parsers should be preloaded by the archive(s)
+        as well as being handed cmdline options and mapping file configuration.
+        """
+        for parser in self.parsers:
+            parser.generate_item_regexes()
+
     def preload_all_archives_into_maps(self):
         """Before doing the actual obfuscation, if we have multiple archives
         to obfuscate then we need to preload each of them into the mappings
diff --git a/sos/cleaner/parsers/__init__.py b/sos/cleaner/parsers/__init__.py
index e62fd9384..6def863a6 100644
--- a/sos/cleaner/parsers/__init__.py
+++ b/sos/cleaner/parsers/__init__.py
@@ -46,9 +46,19 @@ class SoSCleanerParser():
     map_file_key = 'unset'
 
     def __init__(self, config={}):
+        self.regexes = {}
         if self.map_file_key in config:
             self.mapping.conf_update(config[self.map_file_key])
 
+    def generate_item_regexes(self):
+        """Generate regexes for items the parser will be searching for
+        repeatedly without needing to generate them for every file and/or line
+        we process
+
+        Not used by all parsers.
+        """
+        pass
+
     def parse_line(self, line):
         """This will be called for every line in every file we process, so that
         every parser has a chance to scrub everything.
diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py
index 694c6073a..362a1929e 100644
--- a/sos/cleaner/parsers/keyword_parser.py
+++ b/sos/cleaner/parsers/keyword_parser.py
@@ -9,6 +9,7 @@
 # See the LICENSE file in the source distribution for further information.
 
 import os
+import re
 
 from sos.cleaner.parsers import SoSCleanerParser
 from sos.cleaner.mappings.keyword_map import SoSKeywordMap
@@ -33,16 +34,20 @@ def __init__(self, config, keywords=None, keyword_file=None):
                     # pre-generate an obfuscation mapping for each keyword
                     # this is necessary for cases where filenames are being
                     # obfuscated before or instead of file content
-                    self.mapping.get(keyword)
+                    self.mapping.get(keyword.lower())
                     self.user_keywords.append(keyword)
         if keyword_file and os.path.exists(keyword_file):
             with open(keyword_file, 'r') as kwf:
                 self.user_keywords.extend(kwf.read().splitlines())
 
+    def generate_item_regexes(self):
+        for kw in self.user_keywords:
+            self.regexes[kw] = re.compile(kw, re.I)
+
     def parse_line(self, line):
         count = 0
-        for keyword in sorted(self.user_keywords, reverse=True):
-            if keyword in line:
-                line = line.replace(keyword, self.mapping.get(keyword))
-                count += 1
+        for kwrd, reg in sorted(self.regexes.items(), key=len, reverse=True):
+            if reg.search(line):
+                line, _count = reg.subn(self.mapping.get(kwrd.lower()), line)
+                count += _count
         return line, count
diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py
index 3208a6557..49640f7fd 100644
--- a/sos/cleaner/parsers/username_parser.py
+++ b/sos/cleaner/parsers/username_parser.py
@@ -61,12 +61,14 @@ def load_usernames_into_map(self, content):
         for each in users:
             self.mapping.get(each)
 
+    def generate_item_regexes(self):
+        for user in self.mapping.dataset:
+            self.regexes[user] = re.compile(user, re.I)
+
     def parse_line(self, line):
         count = 0
-        for username in sorted(self.mapping.dataset.keys(), reverse=True):
-            _reg = re.compile(username, re.I)
-            if _reg.search(line):
-                line, count = _reg.subn(
-                    self.mapping.get(username.lower()), line
-                )
+        for user, reg in sorted(self.regexes.items(), key=len, reverse=True):
+            if reg.search(line):
+                line, _count = reg.subn(self.mapping.get(user.lower()), line)
+                count += _count
         return line, count
diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py
index cb20772fd..b59eade9a 100644
--- a/tests/unittests/cleaner_tests.py
+++ b/tests/unittests/cleaner_tests.py
@@ -105,6 +105,7 @@ def setUp(self):
         self.host_parser = SoSHostnameParser(config={}, opt_domains='foobar.com')
         self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar'])
         self.kw_parser_none = SoSKeywordParser(config={})
+        self.kw_parser.generate_item_regexes()
 
     def test_ip_parser_valid_ipv4_line(self):
         line = 'foobar foo 10.0.0.1/24 barfoo bar'
From 134451fe8fc80c67b8714713ab49c55245fd7727 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 20 Jan 2022 10:21:32 -0500
Subject: [PATCH 1/2] [Plugin] Add support for containers to `add_copy_spec()`

This commit adds the ability for `add_copy_spec()` to copy files from
containers running on the host via a supported container runtime.

`add_copy_spec()` now has a `container` parameter which, if set, will
generate the command needed to copy a file from a container to our temp
directory. This will be collected after host files but before command
collection.

Runtimes have been updated with a `get_copy_command()` method that will
return the command string needed to copy a given file from a given
container to a given path on the host. Note that the `crio` runtime does
not currently provide a copy mechanism like `docker` or `podman`, so
file collections from containers will not be succesful on hosts using
that as their default runtime.

Finally, the manifest entries for plugins have been updated with a new
`containers` dict field whose entries are container names that have been
collected from. Those fields are also dicts having the same `files` and
`commands` keys/content as those in the plugin entry directly.

Closes: #2439

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/policies/runtimes/__init__.py |  32 ++++++
 sos/policies/runtimes/crio.py     |   3 +
 sos/policies/runtimes/docker.py   |   3 +
 sos/report/plugins/__init__.py    | 180 +++++++++++++++++++++++++-----
 4 files changed, 189 insertions(+), 29 deletions(-)

diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py
index 4e9a45c1..5ac67354 100644
--- a/sos/policies/runtimes/__init__.py
+++ b/sos/policies/runtimes/__init__.py
@@ -69,6 +69,12 @@ class ContainerRuntime():
             return True
         return False
 
+    def check_can_copy(self):
+        """Check if the runtime supports copying files out of containers and
+        onto the host filesystem
+        """
+        return True
+
     def get_containers(self, get_all=False):
         """Get a list of containers present on the system.
 
@@ -199,5 +205,31 @@ class ContainerRuntime():
         """
         return "%s logs -t %s" % (self.binary, container)
 
+    def get_copy_command(self, container, path, dest, sizelimit=None):
+        """Generate the command string used to copy a file out of a container
+        by way of the runtime.
+
+        :param container:   The name or ID of the container
+        :type container:    ``str``
+
+        :param path:        The path to copy from the container. Note that at
+                            this time, no supported runtime supports globbing
+        :type path:         ``str``
+
+        :param dest:        The destination on the *host* filesystem to write
+                            the file to
+        :type dest:         ``str``
+
+        :param sizelimit:   Limit the collection to the last X bytes of the
+                            file at PATH
+        :type sizelimit:    ``int``
+
+        :returns:   Formatted runtime command to copy a file from a container
+        :rtype:     ``str``
+        """
+        if sizelimit:
+            return "%s %s tail -c %s %s" % (self.run_cmd, container, sizelimit,
+                                            path)
+        return "%s cp %s:%s %s" % (self.binary, container, path, dest)
 
 # vim: set et ts=4 sw=4 :
diff --git a/sos/policies/runtimes/crio.py b/sos/policies/runtimes/crio.py
index 980c3ea1..55082d07 100644
--- a/sos/policies/runtimes/crio.py
+++ b/sos/policies/runtimes/crio.py
@@ -19,6 +19,9 @@ class CrioContainerRuntime(ContainerRuntime):
     name = 'crio'
     binary = 'crictl'
 
+    def check_can_copy(self):
+        return False
+
     def get_containers(self, get_all=False):
         """Get a list of containers present on the system.
 
diff --git a/sos/policies/runtimes/docker.py b/sos/policies/runtimes/docker.py
index e81f580e..c0cc156c 100644
--- a/sos/policies/runtimes/docker.py
+++ b/sos/policies/runtimes/docker.py
@@ -27,4 +27,7 @@ class DockerContainerRuntime(ContainerRuntime):
             return True
         return False
 
+    def check_can_copy(self):
+        return self.check_is_active(sysroot=self.policy.sysroot)
+
 # vim: set et ts=4 sw=4 :
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index ca58c22c..0bdc1632 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -487,6 +487,7 @@
         self.commons = commons
         self.forbidden_paths = []
         self.copy_paths = set()
+        self.container_copy_paths = []
         self.copy_strings = []
         self.collect_cmds = []
         self.sysroot = commons['sysroot']
@@ -538,6 +539,7 @@
         self.manifest.add_field('command_timeout', self.cmdtimeout)
         self.manifest.add_list('commands', [])
         self.manifest.add_list('files', [])
+        self.manifest.add_field('containers', {})
 
     def timeout_from_options(self, optname, plugoptname, default_timeout):
         """Returns either the default [plugin|cmd] timeout value, the value as
@@ -1558,7 +1560,7 @@ class Plugin():
                 self.manifest.files.append(manifest_data)
 
     def add_copy_spec(self, copyspecs, sizelimit=None, maxage=None,
-                      tailit=True, pred=None, tags=[]):
+                      tailit=True, pred=None, tags=[], container=None):
         """Add a file, directory, or regex matching filepaths to the archive
 
         :param copyspecs: A file, directory, or regex matching filepaths
@@ -1583,10 +1585,17 @@ class Plugin():
                      for this collection
         :type tags: ``str`` or a ``list`` of strings
 
+        :param container: Container(s) from which this file should be copied
+        :type container: ``str`` or a ``list`` of strings
+
         `copyspecs` will be expanded and/or globbed as appropriate. Specifying
         a directory here will cause the plugin to attempt to collect the entire
         directory, recursively.
 
+        If `container` is specified, `copyspecs` may only be explicit paths,
+        not globs as currently container runtimes do not support glob expansion
+        as part of the copy operation.
+
         Note that `sizelimit` is applied to each `copyspec`, not each file
         individually. For example, a copyspec of
         ``['/etc/foo', '/etc/bar.conf']`` and a `sizelimit` of 25 means that
@@ -1623,28 +1632,79 @@ class Plugin():
         if isinstance(tags, str):
             tags = [tags]
 
+        def get_filename_tag(fname):
+            """Generate a tag to add for a single file copyspec
+
+            This tag will be set to the filename, minus any extensions
+            except '.conf' which will be converted to '_conf'
+            """
+            fname = fname.replace('-', '_')
+            if fname.endswith('.conf'):
+                return fname.replace('.', '_')
+            return fname.split('.')[0]
+
         for copyspec in copyspecs:
             if not (copyspec and len(copyspec)):
                 return False
 
-            if self.use_sysroot():
-                copyspec = self.path_join(copyspec)
-
-            files = self._expand_copy_spec(copyspec)
+            if not container:
+                if self.use_sysroot():
+                    copyspec = self.path_join(copyspec)
+                files = self._expand_copy_spec(copyspec)
+                if len(files) == 0:
+                    continue
+            else:
+                files = [copyspec]
 
-            if len(files) == 0:
-                continue
+            _spec_tags = []
+            if len(files) == 1:
+                _spec_tags = [get_filename_tag(files[0].split('/')[-1])]
 
-            def get_filename_tag(fname):
-                """Generate a tag to add for a single file copyspec
+            _spec_tags.extend(tags)
+            _spec_tags = list(set(_spec_tags))
 
-                This tag will be set to the filename, minus any extensions
-                except '.conf' which will be converted to '_conf'
-                """
-                fname = fname.replace('-', '_')
-                if fname.endswith('.conf'):
-                    return fname.replace('.', '_')
-                return fname.split('.')[0]
+            if container:
+                if isinstance(container, str):
+                    container = [container]
+                for con in container:
+                    if not self.container_exists(con):
+                        continue
+                    _tail = False
+                    if sizelimit:
+                        # to get just the size, stat requires a literal '%s'
+                        # which conflicts with python string formatting
+                        cmd = "stat -c %s " + copyspec
+                        ret = self.exec_cmd(cmd, container=con)
+                        if ret['status'] == 0:
+                            try:
+                                consize = int(ret['output'])
+                                if consize > sizelimit:
+                                    _tail = True
+                            except ValueError:
+                                self._log_info(
+                                    "unable to determine size of '%s' in "
+                                    "container '%s'. Skipping collection."
+                                    % (copyspec, con)
+                                )
+                                continue
+                        else:
+                            self._log_debug(
+                                "stat of '%s' in container '%s' failed, "
+                                "skipping collection: %s"
+                                % (copyspec, con, ret['output'])
+                            )
+                            continue
+                    self.container_copy_paths.append(
+                        (con, copyspec, sizelimit, _tail, _spec_tags)
+                    )
+                    self._log_info(
+                        "added collection of '%s' from container '%s'"
+                        % (copyspec, con)
+                    )
+                # break out of the normal flow here as container file
+                # copies are done via command execution, not raw cp/mv
+                # operations
+                continue
 
             # Files hould be sorted in most-recently-modified order, so that
             # we collect the newest data first before reaching the limit.
@@ -1668,12 +1728,6 @@ class Plugin():
                     return False
                 return True
 
-            _spec_tags = []
-            if len(files) == 1:
-                _spec_tags = [get_filename_tag(files[0].split('/')[-1])]
-
-            _spec_tags.extend(tags)
-
             if since or maxage:
                 files = list(filter(lambda f: time_filter(f), files))
 
@@ -1742,13 +1796,14 @@ class Plugin():
                     # should collect the whole file and stop
                     limit_reached = (sizelimit and current_size == sizelimit)
 
-            _spec_tags = list(set(_spec_tags))
-            if self.manifest:
-                self.manifest.files.append({
-                    'specification': copyspec,
-                    'files_copied': _manifest_files,
-                    'tags': _spec_tags
-                })
+            if not container:
+                # container collection manifest additions are handled later
+                if self.manifest:
+                    self.manifest.files.append({
+                        'specification': copyspec,
+                        'files_copied': _manifest_files,
+                        'tags': _spec_tags
+                    })
 
     def add_blockdev_cmd(self, cmds, devices='block', timeout=None,
                          sizelimit=None, chroot=True, runat=None, env=None,
@@ -2460,6 +2515,30 @@ class Plugin():
                                       chdir=runat, binary=binary, env=env,
                                       foreground=foreground, stderr=stderr)
 
+    def _add_container_file_to_manifest(self, container, path, arcpath, tags):
+        """Adds a file collection to the manifest for a particular container
+        and file path.
+
+        :param container:   The name of the container
+        :type container:    ``str``
+
+        :param path:        The filename from the container filesystem
+        :type path:         ``str``
+
+        :param arcpath:     Where in the archive the file is written to
+        :type arcpath:      ``str``
+
+        :param tags:        Metadata tags for this collection
+        :type tags:         ``str`` or ``list`` of strings
+        """
+        if container not in self.manifest.containers:
+            self.manifest.containers[container] = {'files': [], 'commands': []}
+        self.manifest.containers[container]['files'].append({
+            'specification': path,
+            'files_copied': arcpath,
+            'tags': tags
+        })
+
     def _get_container_runtime(self, runtime=None):
         """Based on policy and request by the plugin, return a usable
         ContainerRuntime if one exists
@@ -2842,6 +2921,48 @@ class Plugin():
             self._do_copy_path(path)
         self.generate_copyspec_tags()
 
+    def _collect_container_copy_specs(self):
+        """Copy any requested files from containers here. This is done
+        separately from normal file collection as this requires the use of
+        a container runtime.
+
+        This method will iterate over self.container_copy_paths which is a set
+        of 5-tuples as (container, path, sizelimit, stdout, tags).
+        """
+        if not self.container_copy_paths:
+            return
+        rt = self._get_container_runtime()
+        if not rt:
+            self._log_info("Cannot collect container based files - no runtime "
+                           "is present/active.")
+            return
+        if not rt.check_can_copy():
+            self._log_info("Loaded runtime '%s' does not support copying "
+                           "files from containers. Skipping collections.")
+            return
+        for contup in self.container_copy_paths:
+            con, path, sizelimit, tailit, tags = contup
+            self._log_info("collecting '%s' from container '%s'" % (path, con))
+
+            arcdest = "sos_containers/%s/%s" % (con, path.lstrip('/'))
+            self.archive.check_path(arcdest, P_FILE)
+            dest = self.archive.dest_path(arcdest)
+
+            cpcmd = rt.get_copy_command(
+                con, path, dest, sizelimit=sizelimit if tailit else None
+            )
+            cpret = self.exec_cmd(cpcmd, timeout=10)
+
+            if cpret['status'] == 0:
+                if tailit:
+                    # current runtimes convert files sent to stdout to tar
+                    # archives, with no way to control that
+                    self.archive.add_string(cpret['output'], arcdest)
+                self._add_container_file_to_manifest(con, path, arcdest, tags)
+            else:
+                self._log_info("error copying '%s' from container '%s': %s"
+                               % (path, con, cpret['output']))
+
     def _collect_cmds(self):
         self.collect_cmds.sort(key=lambda x: x.priority)
         for soscmd in self.collect_cmds:
@@ -2875,6 +2996,7 @@ class Plugin():
         """Collect the data for a plugin."""
         start = time()
         self._collect_copy_specs()
+        self._collect_container_copy_specs()
         self._collect_cmds()
         self._collect_strings()
         fields = (self.name(), time() - start)
-- 
2.27.0


From 5114e164e38f6aa09d1efdd30ab5d2e9266272cc Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Thu, 20 Jan 2022 11:30:30 -0500
Subject: [PATCH 2/2] [Plugin] Add container-based command collections in
 manifest

Following the previous commit of adding a `containers` key to a Plugin's
manifest entry, we will now make an entry in the relevant `containers`
entry for commands executed in containers.

Additionally, we will make a symlink from the relevant `sos_containers/`
path to the collection under `sos_commands/$plugin/`. This should allow
for easier spot analysis of a container by providing a kind of "micro
sos report" for each container collections happen from under the
`sos_containers/` top level directory.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/report/plugins/__init__.py | 41 ++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
index 0bdc1632..2988be08 100644
--- a/sos/report/plugins/__init__.py
+++ b/sos/report/plugins/__init__.py
@@ -2019,8 +2019,10 @@ class Plugin():
         if pred is None:
             pred = self.get_predicate(cmd=True)
         for cmd in cmds:
+            container_cmd = None
             if container:
                 ocmd = cmd
+                container_cmd = (ocmd, container)
                 cmd = self.fmt_container_cmd(container, cmd)
                 if not cmd:
                     self._log_debug("Skipping command '%s' as the requested "
@@ -1805,7 +1807,8 @@
                                  env=env, binary=binary, sizelimit=sizelimit,
                                  pred=pred, subdir=subdir, tags=tags,
                                  changes=changes, foreground=foreground,
-                                 priority=priority, cmd_as_tag=cmd_as_tag)
+                                 priority=priority, cmd_as_tag=cmd_as_tag,
+                                 container_cmd=container_cmd)
 
     def add_cmd_tags(self, tagdict):
         """Retroactively add tags to any commands that have been run by this
@@ -2006,7 +2006,8 @@
                             stderr=True, chroot=True, runat=None, env=None,
                             binary=False, sizelimit=None, subdir=None,
                             changes=False, foreground=False, tags=[],
-                            priority=10, cmd_as_tag=False):
+                            priority=10, cmd_as_tag=False,
+                            container_cmd=False):
         """Execute a command and save the output to a file for inclusion in the
         report.
 
@@ -2362,10 +2365,14 @@ class Plugin():
             os.path.join(self.archive.get_archive_path(), outfn) if outfn else
             ''
         )
+
         if self.manifest:
             manifest_cmd['filepath'] = outfn
             manifest_cmd['run_time'] = run_time
             self.manifest.commands.append(manifest_cmd)
+            if container_cmd:
+                self._add_container_cmd_to_manifest(manifest_cmd.copy(),
+                                                    container_cmd)
         return result
 
     def collect_cmd_output(self, cmd, suggest_filename=None,
@@ -2539,6 +2546,36 @@ class Plugin():
             'tags': tags
         })
 
+    def _add_container_cmd_to_manifest(self, manifest, contup):
+        """Adds a command collection to the manifest for a particular container
+        and creates a symlink to that collection from the relevant
+        sos_containers/ location
+
+        :param manifest:    The manifest entry for the command
+        :type manifest:     ``dict``
+
+        :param contup:      A tuple of (original_cmd, container_name)
+        :type contup:       ``tuple``
+        """
+
+        cmd, container = contup
+        if container not in self.manifest.containers:
+            self.manifest.containers[container] = {'files': [], 'commands': []}
+        manifest['exec'] = cmd
+        manifest['command'] = cmd.split(' ')[0]
+        manifest['parameters'] = cmd.split(' ')[1:]
+
+        _cdir = "sos_containers/%s/sos_commands/%s" % (container, self.name())
+        _outloc = "../../../../%s" % manifest['filepath']
+        cmdfn = self._mangle_command(cmd)
+        conlnk = "%s/%s" % (_cdir, cmdfn)
+
+        self.archive.check_path(conlnk, P_FILE)
+        os.symlink(_outloc, self.archive.dest_path(conlnk))
+
+        manifest['filepath'] = conlnk
+        self.manifest.containers[container]['commands'].append(manifest)
+
     def _get_container_runtime(self, runtime=None):
         """Based on policy and request by the plugin, return a usable
         ContainerRuntime if one exists
-- 
2.27.0

From 2ae16e0245e1b01b8547e507abb69c11871a8467 Mon Sep 17 00:00:00 2001
From: Jake Hunsaker <jhunsake@redhat.com>
Date: Mon, 21 Feb 2022 14:37:09 -0500
Subject: [PATCH] [sosnode] Handle downstream versioning for runtime option
 check

First, adds parsing and formatting for an sos installation's release
version according to the loaded package manager for that node.

Adds a fallback version check for 4.2-13 for RHEL downstreams that
backport the `container-runtime` option into sos-4.2.

Carry this in upstream to account for use cases where a workstation used
to run `collect` from may be from a different stream than those used by
cluster nodes.

Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
---
 sos/collector/sosnode.py | 60 ++++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py
index 7bbe0cd1..d9b998b0 100644
--- a/sos/collector/sosnode.py
+++ b/sos/collector/sosnode.py
@@ -275,21 +275,34 @@ class SosNode():
     def _load_sos_info(self):
         """Queries the node for information about the installed version of sos
         """
+        ver = None
+        rel = None
         if self.host.container_version_command is None:
             pkg = self.host.package_manager.pkg_version(self.host.sos_pkg_name)
             if pkg is not None:
                 ver = '.'.join(pkg['version'])
-                self.sos_info['version'] = ver
+                if pkg['release']:
+                    rel = pkg['release']
+
         else:
             # use the containerized policy's command
             pkgs = self.run_command(self.host.container_version_command,
                                     use_container=True, need_root=True)
             if pkgs['status'] == 0:
-                ver = pkgs['output'].strip().split('-')[1]
-                if ver:
-                    self.sos_info['version'] = ver
-            else:
-                self.sos_info['version'] = None
+                _, ver, rel = pkgs['output'].strip().split('-')
+
+        if ver:
+            if len(ver.split('.')) == 2:
+                # safeguard against maintenance releases throwing off the
+                # comparison by LooseVersion
+                ver += '.0'
+            try:
+                ver += '-%s' % rel.split('.')[0]
+            except Exception as err:
+                self.log_debug("Unable to fully parse sos release: %s" % err)
+
+        self.sos_info['version'] = ver
+
         if self.sos_info['version']:
             self.log_info('sos version is %s' % self.sos_info['version'])
         else:
@@ -381,9 +394,37 @@ class SosNode():
         """Checks to see if the sos installation on the node is AT LEAST the
         given ver. This means that if the installed version is greater than
         ver, this will still return True
+
+        :param ver: Version number we are trying to verify is installed
+        :type ver:  ``str``
+
+        :returns:   True if installed version is at least ``ver``, else False
+        :rtype:     ``bool``
         """
-        return self.sos_info['version'] is not None and \
-            LooseVersion(self.sos_info['version']) >= ver
+        def _format_version(ver):
+            # format the version we're checking to a standard form of X.Y.Z-R
+            try:
+                _fver = ver.split('-')[0]
+                _rel = ''
+                if '-' in ver:
+                    _rel = '-' + ver.split('-')[-1].split('.')[0]
+                if len(_fver.split('.')) == 2:
+                    _fver += '.0'
+
+                return _fver + _rel
+            except Exception as err:
+                self.log_debug("Unable to format '%s': %s" % (ver, err))
+                return ver
+
+        _ver = _format_version(ver)
+
+        try:
+            _node_ver = LooseVersion(self.sos_info['version'])
+            _test_ver = LooseVersion(_ver)
+            return _node_ver >= _test_ver
+        except Exception as err:
+            self.log_error("Error checking sos version: %s" % err)
+            return False
 
     def is_installed(self, pkg):
         """Checks if a given package is installed on the node"""
@@ -587,7 +628,8 @@ class SosNode():
                 sos_opts.append('--cmd-timeout=%s'
                                 % quote(str(self.opts.cmd_timeout)))
 
-        if self.check_sos_version('4.3'):
+        # handle downstream versions that backported this option
+        if self.check_sos_version('4.3') or self.check_sos_version('4.2-13'):
             if self.opts.container_runtime != 'auto':
                 sos_opts.append(
                     "--container-runtime=%s" % self.opts.container_runtime
-- 
2.34.1