diff --git a/README.debrand b/README.debrand deleted file mode 100644 index 01c46d2..0000000 --- a/README.debrand +++ /dev/null @@ -1,2 +0,0 @@ -Warning: This package was configured for automatic debranding, but the changes -failed to apply. diff --git a/SOURCES/sos-bz2011537-estimate-only-option.patch b/SOURCES/sos-bz2011537-estimate-only-option.patch index 968bdde..c780809 100644 --- a/SOURCES/sos-bz2011537-estimate-only-option.patch +++ b/SOURCES/sos-bz2011537-estimate-only-option.patch @@ -363,3 +363,57 @@ index 10952566..a4c92acc 100644 -- 2.31.1 +From c6a5bbb8d75aadd5c7f76d3f469929aba2cf8060 Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Wed, 5 Jan 2022 10:33:58 +0100 +Subject: [PATCH] [report] Provide better warning about estimate-mode + +As --estimate-only calculates disk usage based on `stat` data that +differs from outputs of other commands like `du`, enhance the warning +about reliability of the calculated estimation. + +Also add a rule-of-thumb recommendation of real disk space requirements. + +Resolves: #2815 + +Signed-off-by: Pavel Moravec +--- + man/en/sos-report.1 | 10 +++++++--- + sos/report/__init__.py | 3 ++- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/man/en/sos-report.1 b/man/en/sos-report.1 +index 464a77e54..e34773986 100644 +--- a/man/en/sos-report.1 ++++ b/man/en/sos-report.1 +@@ -343,9 +343,13 @@ is available at the end. + + Plugins will be collected sequentially, size of collected files and commands outputs + will be calculated and the plugin files will be immediatelly deleted prior execution +-of the next plugin. This still can consume whole free disk space, though. Please note, +-size estimations may not be accurate for highly utilized systems due to changes between +-an estimate and a real execution. ++of the next plugin. This still can consume whole free disk space, though. ++ ++Please note, size estimations may not be accurate for highly utilized systems due to ++changes between an estimate and a real execution. Also some difference between ++estimation (using `stat` command) and other commands used (i.e. `du`). ++ ++A rule of thumb is to reserve at least double the estimation. + .TP + .B \--upload + If specified, attempt to upload the resulting archive to a vendor defined location. +diff --git a/sos/report/__init__.py b/sos/report/__init__.py +index ef61fb344..e0617b45e 100644 +--- a/sos/report/__init__.py ++++ b/sos/report/__init__.py +@@ -1330,7 +1330,8 @@ def final_work(self): + self.ui_log.info("Please note the estimation is relevant to the " + "current options.") + self.ui_log.info("Be aware that the real disk space requirements " +- "might be different.") ++ "might be different. A rule of thumb is to " ++ "reserve at least double the estimation.") + self.ui_log.info("") + + # package up and compress the results diff --git a/SOURCES/sos-bz2024893-cleaner-hostnames-improvements.patch b/SOURCES/sos-bz2024893-cleaner-hostnames-improvements.patch index cceb1de..d257fdf 100644 --- a/SOURCES/sos-bz2024893-cleaner-hostnames-improvements.patch +++ b/SOURCES/sos-bz2024893-cleaner-hostnames-improvements.patch @@ -1595,3 +1595,205 @@ index 5cd8e9857..33b0e6c80 100644 ob_domain = self._new_obfuscated_domain(dname) ob_domain = '.'.join([ob_domain, top_domain]) self.dataset['.'.join(domain)] = ob_domain +From f5e1298162a9393ea2d9f5c4df40dfece50f5f88 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 6 Jan 2022 13:15:15 -0500 +Subject: [PATCH 1/3] [hostname] Fix loading and detection of long base domains + +Our domain matching has up to now assumed that users would be providing +'base' domains such as 'example.com' whereby something like +'foo.bar.example.com' is a subdomain (or host) within that base domain. + +However, the use case exists to provide 'foo.bar.example.com' as the +base domain, without wanting to obfuscate 'example.com' directly. + +This commit fixes our handling of both loading these longer domains and +doing the 'domain is part of a domain we want to obfuscate' check. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/mappings/hostname_map.py | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py +index 33b0e6c8..7a7cf6b8 100644 +--- a/sos/cleaner/mappings/hostname_map.py ++++ b/sos/cleaner/mappings/hostname_map.py +@@ -50,10 +50,14 @@ class SoSHostnameMap(SoSMap): + in this parser, we need to re-inject entries from the map_file into + these dicts and not just the underlying 'dataset' dict + """ +- for domain in self.dataset: ++ for domain, ob_pair in self.dataset.items(): + if len(domain.split('.')) == 1: + self.hosts[domain.split('.')[0]] = self.dataset[domain] + else: ++ if ob_pair.startswith('obfuscateddomain'): ++ # directly exact domain matches ++ self._domains[domain] = ob_pair.split('.')[0] ++ continue + # strip the host name and trailing top-level domain so that + # we in inject the domain properly for later string matching + +@@ -102,9 +106,12 @@ class SoSHostnameMap(SoSMap): + and should be obfuscated + """ + host = domain.split('.') ++ no_tld = '.'.join(domain.split('.')[0:-1]) + if len(host) == 1: + # don't block on host's shortname + return host[0] in self.hosts.keys() ++ elif any([no_tld.endswith(_d) for _d in self._domains]): ++ return True + else: + domain = host[0:-1] + for known_domain in self._domains: +-- +2.31.1 + + +From e241cf33a14ecd4e848a5fd857c5d3d7d07fbd71 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 6 Jan 2022 13:18:44 -0500 +Subject: [PATCH 2/3] [cleaner] Improve parser-specific file skipping + +This commit improves our handling of skipping files on a per-parser +basis, by first filtering the list of parsers that `obfuscate_line()` +will iterate over by the parser's `skip_file` class attr, rather than +relying on higher-level checks. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/__init__.py | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py +index 3f530d44..5686e213 100644 +--- a/sos/cleaner/__init__.py ++++ b/sos/cleaner/__init__.py +@@ -12,6 +12,7 @@ import hashlib + import json + import logging + import os ++import re + import shutil + import tempfile + +@@ -640,10 +641,16 @@ third party. + self.log_debug("Obfuscating %s" % short_name or filename, + caller=arc_name) + tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) ++ _parsers = [ ++ _p for _p in self.parsers if not ++ any([ ++ re.match(p, short_name) for p in _p.skip_files ++ ]) ++ ] + with open(filename, 'r') as fname: + for line in fname: + try: +- line, count = self.obfuscate_line(line) ++ line, count = self.obfuscate_line(line, _parsers) + subs += count + tfile.write(line) + except Exception as err: +@@ -713,7 +720,7 @@ third party. + pass + return string_data + +- def obfuscate_line(self, line): ++ def obfuscate_line(self, line, parsers=None): + """Run a line through each of the obfuscation parsers, keeping a + cumulative total of substitutions done on that particular line. + +@@ -721,6 +728,8 @@ third party. + + :param line str: The raw line as read from the file being + processed ++ :param parsers: A list of parser objects to obfuscate ++ with. If None, use all. + + Returns the fully obfuscated line and the number of substitutions made + """ +@@ -729,7 +738,9 @@ third party. + count = 0 + if not line.strip(): + return line, count +- for parser in self.parsers: ++ if parsers is None: ++ parsers = self.parsers ++ for parser in parsers: + try: + line, _count = parser.parse_line(line) + count += _count +-- +2.31.1 + + +From 96c9a833e77639a853b7d3d6f1df68bbbbe5e9cb Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 6 Jan 2022 13:20:32 -0500 +Subject: [PATCH 3/3] [cleaner] Add skips for known files and usernames + +Adds skips for `/proc/kallsyms` which should never be obfuscated, as +well as any packaging-related log file for the IP parser. Further, do +not obfuscate the `stack` users, as that is a well-known user for many +configurations that, if obfuscated, could result in undesired string +substitutions in normal logging. + +Signed-off-by: Jake Hunsaker +--- + sos/cleaner/archives/__init__.py | 2 ++ + sos/cleaner/parsers/ip_parser.py | 3 ++- + sos/cleaner/parsers/username_parser.py | 1 + + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py +index 795c5a78..cbf1f809 100644 +--- a/sos/cleaner/archives/__init__.py ++++ b/sos/cleaner/archives/__init__.py +@@ -43,6 +43,7 @@ class SoSObfuscationArchive(): + type_name = 'undetermined' + description = 'undetermined' + is_nested = False ++ skip_files = [] + prep_files = {} + + def __init__(self, archive_path, tmpdir): +@@ -111,6 +112,7 @@ class SoSObfuscationArchive(): + Returns: list of files and file regexes + """ + return [ ++ 'proc/kallsyms', + 'sosreport-', + 'sys/firmware', + 'sys/fs', +diff --git a/sos/cleaner/parsers/ip_parser.py b/sos/cleaner/parsers/ip_parser.py +index 71d38be8..b007368c 100644 +--- a/sos/cleaner/parsers/ip_parser.py ++++ b/sos/cleaner/parsers/ip_parser.py +@@ -37,7 +37,8 @@ class SoSIPParser(SoSCleanerParser): + 'sos_commands/snappy/snap_list_--all', + 'sos_commands/snappy/snap_--version', + 'sos_commands/vulkan/vulkaninfo', +- 'var/log/.*dnf.*' ++ 'var/log/.*dnf.*', ++ 'var/log/.*packag.*' # get 'packages' and 'packaging' logs + ] + + map_file_key = 'ip_map' +diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py +index 229c7de4..3208a655 100644 +--- a/sos/cleaner/parsers/username_parser.py ++++ b/sos/cleaner/parsers/username_parser.py +@@ -32,6 +32,7 @@ class SoSUsernameParser(SoSCleanerParser): + 'nobody', + 'nfsnobody', + 'shutdown', ++ 'stack', + 'reboot', + 'root', + 'ubuntu', +-- +2.31.1 + diff --git a/SOURCES/sos-bz2037350-ocp-backports.patch b/SOURCES/sos-bz2037350-ocp-backports.patch new file mode 100644 index 0000000..a47dc91 --- /dev/null +++ b/SOURCES/sos-bz2037350-ocp-backports.patch @@ -0,0 +1,5650 @@ +From 676dfca09d9c783311a51a1c53fa0f7ecd95bd28 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 10 Sep 2021 13:38:19 -0400 +Subject: [PATCH] [collect] Abstract transport protocol from SoSNode + +Since its addition to sos, collect has assumed the use of a system +installation of SSH in order to connect to the nodes identified for +collection. However, there may be use cases and desires to use other +transport protocols. + +As such, provide an abstraction for these protocols in the form of the +new `RemoteTransport` class that `SoSNode` will now leverage. So far an +abstraction for the currently used SSH ControlPersist function is +provided, along with a psuedo abstraction for local execution so that +SoSNode does not directly need to make more "if local then foo" checks +than are absolutely necessary. + +Related: #2668 + +Signed-off-by: Jake Hunsaker +--- + setup.py | 4 +- + sos/collector/__init__.py | 54 +-- + sos/collector/clusters/__init__.py | 4 +- + sos/collector/clusters/jbon.py | 2 + + sos/collector/clusters/kubernetes.py | 4 +- + sos/collector/clusters/ocp.py | 6 +- + sos/collector/clusters/ovirt.py | 10 +- + sos/collector/clusters/pacemaker.py | 8 +- + sos/collector/clusters/satellite.py | 4 +- + sos/collector/sosnode.py | 388 +++++--------------- + sos/collector/transports/__init__.py | 317 ++++++++++++++++ + sos/collector/transports/control_persist.py | 199 ++++++++++ + sos/collector/transports/local.py | 49 +++ + 13 files changed, 705 insertions(+), 344 deletions(-) + create mode 100644 sos/collector/transports/__init__.py + create mode 100644 sos/collector/transports/control_persist.py + create mode 100644 sos/collector/transports/local.py + +diff --git a/setup.py b/setup.py +index 7653b59d..25e87a71 100644 +--- a/setup.py ++++ b/setup.py +@@ -101,8 +101,8 @@ setup( + 'sos.policies.distros', 'sos.policies.runtimes', + 'sos.policies.package_managers', 'sos.policies.init_systems', + 'sos.report', 'sos.report.plugins', 'sos.collector', +- 'sos.collector.clusters', 'sos.cleaner', 'sos.cleaner.mappings', +- 'sos.cleaner.parsers', 'sos.cleaner.archives' ++ 'sos.collector.clusters', 'sos.collector.transports', 'sos.cleaner', ++ 'sos.cleaner.mappings', 'sos.cleaner.parsers', 'sos.cleaner.archives' + ], + cmdclass=cmdclass, + command_options=command_options, +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index b2a07f37..da912655 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -17,7 +17,6 @@ import re + import string + import socket + import shutil +-import subprocess + import sys + + from datetime import datetime +@@ -28,7 +27,6 @@ from pipes import quote + from textwrap import fill + from sos.cleaner import SoSCleaner + from sos.collector.sosnode import SosNode +-from sos.collector.exceptions import ControlPersistUnsupportedException + from sos.options import ClusterOption + from sos.component import SoSComponent + from sos import __version__ +@@ -154,7 +152,6 @@ class SoSCollector(SoSComponent): + try: + self.parse_node_strings() + self.parse_cluster_options() +- self._check_for_control_persist() + self.log_debug('Executing %s' % ' '.join(s for s in sys.argv)) + self.log_debug("Found cluster profiles: %s" + % self.clusters.keys()) +@@ -437,33 +434,6 @@ class SoSCollector(SoSComponent): + action='extend', + help='List of usernames to obfuscate') + +- def _check_for_control_persist(self): +- """Checks to see if the local system supported SSH ControlPersist. +- +- ControlPersist allows OpenSSH to keep a single open connection to a +- remote host rather than building a new session each time. This is the +- same feature that Ansible uses in place of paramiko, which we have a +- need to drop in sos-collector. +- +- This check relies on feedback from the ssh binary. The command being +- run should always generate stderr output, but depending on what that +- output reads we can determine if ControlPersist is supported or not. +- +- For our purposes, a host that does not support ControlPersist is not +- able to run sos-collector. +- +- Returns +- True if ControlPersist is supported, else raise Exception. +- """ +- ssh_cmd = ['ssh', '-o', 'ControlPersist'] +- cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, +- stderr=subprocess.PIPE) +- out, err = cmd.communicate() +- err = err.decode('utf-8') +- if 'Bad configuration option' in err or 'Usage:' in err: +- raise ControlPersistUnsupportedException +- return True +- + def exit(self, msg, error=1): + """Used to safely terminate if sos-collector encounters an error""" + self.log_error(msg) +@@ -455,7 +455,7 @@ class SoSCollector(SoSComponent): + 'cmdlineopts': self.opts, + 'need_sudo': True if self.opts.ssh_user != 'root' else False, + 'tmpdir': self.tmpdir, +- 'hostlen': len(self.opts.master) or len(self.hostname), ++ 'hostlen': max(len(self.opts.primary), len(self.hostname)), + 'policy': self.policy + } + +@@ -1020,9 +1020,10 @@ class SoSCollector(SoSComponent): + self.node_list.append(self.hostname) + self.reduce_node_list() + try: +- self.commons['hostlen'] = len(max(self.node_list, key=len)) ++ _node_max = len(max(self.node_list, key=len)) ++ self.commons['hostlen'] = max(_node_max, self.commons['hostlen']) + except (TypeError, ValueError): +- self.commons['hostlen'] = len(self.opts.master) ++ pass + + def _connect_to_node(self, node): + """Try to connect to the node, and if we can add to the client list to +@@ -1068,7 +1039,7 @@ class SoSCollector(SoSComponent): + client.set_node_manifest(getattr(self.collect_md.nodes, + node[0])) + else: +- client.close_ssh_session() ++ client.disconnect() + except Exception: + pass + +@@ -1077,12 +1048,11 @@ class SoSCollector(SoSComponent): + provided on the command line + """ + disclaimer = ("""\ +-This utility is used to collect sosreports from multiple \ +-nodes simultaneously. It uses OpenSSH's ControlPersist feature \ +-to connect to nodes and run commands remotely. If your system \ +-installation of OpenSSH is older than 5.6, please upgrade. ++This utility is used to collect sos reports from multiple \ ++nodes simultaneously. Remote connections are made and/or maintained \ ++to those nodes via well-known transport protocols such as SSH. + +-An archive of sosreport tarballs collected from the nodes will be \ ++An archive of sos report tarballs collected from the nodes will be \ + generated in %s and may be provided to an appropriate support representative. + + The generated archive may contain data considered sensitive \ +@@ -1230,10 +1200,10 @@ this utility or remote systems that it connects to. + self.log_error("Error running sosreport: %s" % err) + + def close_all_connections(self): +- """Close all ssh sessions for nodes""" ++ """Close all sessions for nodes""" + for client in self.client_list: +- self.log_debug('Closing SSH connection to %s' % client.address) +- client.close_ssh_session() ++ self.log_debug('Closing connection to %s' % client.address) ++ client.disconnect() + + def create_cluster_archive(self): + """Calls for creation of tar archive then cleans up the temporary +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index 2b5d7018..64ac2a44 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -188,8 +188,8 @@ class Cluster(): + :rtype: ``dict`` + """ + res = self.master.run_command(cmd, get_pty=True, need_root=need_root) +- if res['stdout']: +- res['stdout'] = res['stdout'].replace('Password:', '') ++ if res['output']: ++ res['output'] = res['output'].replace('Password:', '') + return res + + def setup(self): +diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py +index 488fbd16..8f083ac6 100644 +--- a/sos/collector/clusters/jbon.py ++++ b/sos/collector/clusters/jbon.py +@@ -28,3 +28,5 @@ class jbon(Cluster): + # This should never be called, but as insurance explicitly never + # allow this to be enabled via the determine_cluster() path + return False ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py +index cdbf8861..99f788dc 100644 +--- a/sos/collector/clusters/kubernetes.py ++++ b/sos/collector/clusters/kubernetes.py +@@ -34,7 +34,7 @@ class kubernetes(Cluster): + if res['status'] == 0: + nodes = [] + roles = [x for x in self.get_option('role').split(',') if x] +- for nodeln in res['stdout'].splitlines()[1:]: ++ for nodeln in res['output'].splitlines()[1:]: + node = nodeln.split() + if not roles: + nodes.append(node[0]) +@@ -44,3 +44,5 @@ class kubernetes(Cluster): + return nodes + else: + raise Exception('Node enumeration did not return usable output') ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 5479417d..ad97587f 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -93,7 +93,7 @@ class ocp(Cluster): + res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) + if res['status'] == 0: + roles = [r for r in self.get_option('role').split(':')] +- self.node_dict = self._build_dict(res['stdout'].splitlines()) ++ self.node_dict = self._build_dict(res['output'].splitlines()) + for node in self.node_dict: + if roles: + for role in roles: +@@ -103,7 +103,7 @@ class ocp(Cluster): + nodes.append(node) + else: + msg = "'oc' command failed" +- if 'Missing or incomplete' in res['stdout']: ++ if 'Missing or incomplete' in res['output']: + msg = ("'oc' failed due to missing kubeconfig on master node." + " Specify one via '-c ocp.kubeconfig='") + raise Exception(msg) +@@ -168,3 +168,5 @@ class ocp(Cluster): + def set_node_options(self, node): + # don't attempt OC API collections on non-primary nodes + node.plugin_options.append('openshift.no-oc=on') ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py +index 079a122e..bd2d0c74 100644 +--- a/sos/collector/clusters/ovirt.py ++++ b/sos/collector/clusters/ovirt.py +@@ -98,7 +98,7 @@ class ovirt(Cluster): + return [] + res = self._run_db_query(self.dbquery) + if res['status'] == 0: +- nodes = res['stdout'].splitlines()[2:-1] ++ nodes = res['output'].splitlines()[2:-1] + return [n.split('(')[0].strip() for n in nodes] + else: + raise Exception('database query failed, return code: %s' +@@ -114,7 +114,7 @@ class ovirt(Cluster): + engconf = '/etc/ovirt-engine/engine.conf.d/10-setup-database.conf' + res = self.exec_primary_cmd('cat %s' % engconf, need_root=True) + if res['status'] == 0: +- config = res['stdout'].splitlines() ++ config = res['output'].splitlines() + for line in config: + try: + k = str(line.split('=')[0]) +@@ -141,7 +141,7 @@ class ovirt(Cluster): + '--batch -o postgresql {}' + ).format(self.conf['ENGINE_DB_PASSWORD'], sos_opt) + db_sos = self.exec_primary_cmd(cmd, need_root=True) +- for line in db_sos['stdout'].splitlines(): ++ for line in db_sos['output'].splitlines(): + if fnmatch.fnmatch(line, '*sosreport-*tar*'): + _pg_dump = line.strip() + self.master.manifest.add_field('postgresql_dump', +@@ -180,5 +180,7 @@ class rhhi_virt(rhv): + ret = self._run_db_query('SELECT count(server_id) FROM gluster_server') + if ret['status'] == 0: + # if there are any entries in this table, RHHI-V is in use +- return ret['stdout'].splitlines()[2].strip() != '0' ++ return ret['output'].splitlines()[2].strip() != '0' + return False ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py +index 034f3f3e..55024314 100644 +--- a/sos/collector/clusters/pacemaker.py ++++ b/sos/collector/clusters/pacemaker.py +@@ -27,7 +27,7 @@ class pacemaker(Cluster): + self.log_error('Cluster status could not be determined. Is the ' + 'cluster running on this node?') + return [] +- if 'node names do not match' in self.res['stdout']: ++ if 'node names do not match' in self.res['output']: + self.log_warn('Warning: node name mismatch reported. Attempts to ' + 'connect to some nodes may fail.\n') + return self.parse_pcs_output() +@@ -41,17 +41,19 @@ class pacemaker(Cluster): + return nodes + + def get_online_nodes(self): +- for line in self.res['stdout'].splitlines(): ++ for line in self.res['output'].splitlines(): + if line.startswith('Online:'): + nodes = line.split('[')[1].split(']')[0] + return [n for n in nodes.split(' ') if n] + + def get_offline_nodes(self): + offline = [] +- for line in self.res['stdout'].splitlines(): ++ for line in self.res['output'].splitlines(): + if line.startswith('Node') and line.endswith('(offline)'): + offline.append(line.split()[1].replace(':', '')) + if line.startswith('OFFLINE:'): + nodes = line.split('[')[1].split(']')[0] + offline.extend([n for n in nodes.split(' ') if n]) + return offline ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py +index e123c8a3..7c21e553 100644 +--- a/sos/collector/clusters/satellite.py ++++ b/sos/collector/clusters/satellite.py +@@ -28,7 +28,7 @@ class satellite(Cluster): + res = self.exec_primary_cmd(cmd, need_root=True) + if res['status'] == 0: + nodes = [ +- n.strip() for n in res['stdout'].splitlines() ++ n.strip() for n in res['output'].splitlines() + if 'could not change directory' not in n + ] + return nodes +@@ -38,3 +38,5 @@ class satellite(Cluster): + if node.address == self.master.address: + return 'satellite' + return 'capsule' ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 4b1ee109..f79bd5ff 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -12,22 +12,16 @@ import fnmatch + import inspect + import logging + import os +-import pexpect + import re +-import shutil + + from distutils.version import LooseVersion + from pipes import quote + from sos.policies import load + from sos.policies.init_systems import InitSystem +-from sos.collector.exceptions import (InvalidPasswordException, +- TimeoutPasswordAuthException, +- PasswordRequestException, +- AuthPermissionDeniedException, ++from sos.collector.transports.control_persist import SSHControlPersist ++from sos.collector.transports.local import LocalTransport ++from sos.collector.exceptions import (CommandTimeoutException, + ConnectionException, +- CommandTimeoutException, +- ConnectionTimeoutException, +- ControlSocketMissingException, + UnsupportedHostException) + + +@@ -61,34 +61,25 @@ class SosNode(): + 'sos_cmd': commons['sos_cmd'] + } + self.sos_bin = 'sosreport' +- filt = ['localhost', '127.0.0.1'] + self.soslog = logging.getLogger('sos') + self.ui_log = logging.getLogger('sos_ui') +- self.control_path = ("%s/.sos-collector-%s" +- % (self.tmpdir, self.address)) +- self.ssh_cmd = self._create_ssh_command() +- if self.address not in filt: +- try: +- self.connected = self._create_ssh_session() +- except Exception as err: +- self.log_error('Unable to open SSH session: %s' % err) +- raise +- else: +- self.connected = True +- self.local = True +- self.need_sudo = os.getuid() != 0 ++ self._transport = self._load_remote_transport(commons) ++ try: ++ self._transport.connect(self._password) ++ except Exception as err: ++ self.log_error('Unable to open remote session: %s' % err) ++ raise + # load the host policy now, even if we don't want to load further + # host information. This is necessary if we're running locally on the + # cluster master but do not want a local report as we still need to do + # package checks in that instance + self.host = self.determine_host_policy() +- self.get_hostname() ++ self.hostname = self._transport.hostname + if self.local and self.opts.no_local: + load_facts = False + if self.connected and load_facts: + if not self.host: +- self.connected = False +- self.close_ssh_session() ++ self._transport.disconnect() + return None + if self.local: + if self.check_in_container(): +@@ -103,11 +88,26 @@ class SosNode(): + self.create_sos_container() + self._load_sos_info() + +- def _create_ssh_command(self): +- """Build the complete ssh command for this node""" +- cmd = "ssh -oControlPath=%s " % self.control_path +- cmd += "%s@%s " % (self.opts.ssh_user, self.address) +- return cmd ++ @property ++ def connected(self): ++ if self._transport: ++ return self._transport.connected ++ # if no transport, we're running locally ++ return True ++ ++ def disconnect(self): ++ """Wrapper to close the remote session via our transport agent ++ """ ++ self._transport.disconnect() ++ ++ def _load_remote_transport(self, commons): ++ """Determine the type of remote transport to load for this node, then ++ return an instantiated instance of that transport ++ """ ++ if self.address in ['localhost', '127.0.0.1']: ++ self.local = True ++ return LocalTransport(self.address, commons) ++ return SSHControlPersist(self.address, commons) + + def _fmt_msg(self, msg): + return '{:<{}} : {}'.format(self._hostname, self.hostlen + 1, msg) +@@ -135,6 +135,7 @@ class SosNode(): + self.manifest.add_field('policy', self.host.distro) + self.manifest.add_field('sos_version', self.sos_info['version']) + self.manifest.add_field('final_sos_command', '') ++ self.manifest.add_field('transport', self._transport.name) + + def check_in_container(self): + """ +@@ -160,13 +161,13 @@ class SosNode(): + res = self.run_command(cmd, need_root=True) + if res['status'] in [0, 125]: + if res['status'] == 125: +- if 'unable to retrieve auth token' in res['stdout']: ++ if 'unable to retrieve auth token' in res['output']: + self.log_error( + "Could not pull image. Provide either a username " + "and password or authfile" + ) + raise Exception +- elif 'unknown: Not found' in res['stdout']: ++ elif 'unknown: Not found' in res['output']: + self.log_error('Specified image not found on registry') + raise Exception + # 'name exists' with code 125 means the container was +@@ -181,11 +182,11 @@ class SosNode(): + return True + else: + self.log_error("Could not start container after create: %s" +- % ret['stdout']) ++ % ret['output']) + raise Exception + else: + self.log_error("Could not create container on host: %s" +- % res['stdout']) ++ % res['output']) + raise Exception + + def get_container_auth(self): +@@ -204,18 +205,11 @@ class SosNode(): + + def file_exists(self, fname, need_root=False): + """Checks for the presence of fname on the remote node""" +- if not self.local: +- try: +- res = self.run_command("stat %s" % fname, need_root=need_root) +- return res['status'] == 0 +- except Exception: +- return False +- else: +- try: +- os.stat(fname) +- return True +- except Exception: +- return False ++ try: ++ res = self.run_command("stat %s" % fname, need_root=need_root) ++ return res['status'] == 0 ++ except Exception: ++ return False + + @property + def _hostname(self): +@@ -223,18 +217,6 @@ class SosNode(): + return self.hostname + return self.address + +- @property +- def control_socket_exists(self): +- """Check if the SSH control socket exists +- +- The control socket is automatically removed by the SSH daemon in the +- event that the last connection to the node was greater than the timeout +- set by the ControlPersist option. This can happen for us if we are +- collecting from a large number of nodes, and the timeout expires before +- we start collection. +- """ +- return os.path.exists(self.control_path) +- + def _sanitize_log_msg(self, msg): + """Attempts to obfuscate sensitive information in log messages such as + passwords""" +@@ -264,12 +246,6 @@ class SosNode(): + msg = '[%s:%s] %s' % (self._hostname, caller, msg) + self.soslog.debug(msg) + +- def get_hostname(self): +- """Get the node's hostname""" +- sout = self.run_command('hostname') +- self.hostname = sout['stdout'].strip() +- self.log_info('Hostname set to %s' % self.hostname) +- + def _format_cmd(self, cmd): + """If we need to provide a sudo or root password to a command, then + here we prefix the command with the correct bits +@@ -280,19 +256,6 @@ class SosNode(): + return "sudo -S %s" % cmd + return cmd + +- def _fmt_output(self, output=None, rc=0): +- """Formats the returned output from a command into a dict""" +- if rc == 0: +- stdout = output +- stderr = '' +- else: +- stdout = '' +- stderr = output +- res = {'status': rc, +- 'stdout': stdout, +- 'stderr': stderr} +- return res +- + def _load_sos_info(self): + """Queries the node for information about the installed version of sos + """ +@@ -306,7 +269,7 @@ class SosNode(): + pkgs = self.run_command(self.host.container_version_command, + use_container=True, need_root=True) + if pkgs['status'] == 0: +- ver = pkgs['stdout'].strip().split('-')[1] ++ ver = pkgs['output'].strip().split('-')[1] + if ver: + self.sos_info['version'] = ver + else: +@@ -321,18 +284,21 @@ class SosNode(): + self.log_error('sos is not installed on this node') + self.connected = False + return False +- cmd = 'sosreport -l' ++ # sos-4.0 changes the binary ++ if self.check_sos_version('4.0'): ++ self.sos_bin = 'sos report' ++ cmd = "%s -l" % self.sos_bin + sosinfo = self.run_command(cmd, use_container=True, need_root=True) + if sosinfo['status'] == 0: +- self._load_sos_plugins(sosinfo['stdout']) ++ self._load_sos_plugins(sosinfo['output']) + if self.check_sos_version('3.6'): + self._load_sos_presets() + + def _load_sos_presets(self): +- cmd = 'sosreport --list-presets' ++ cmd = '%s --list-presets' % self.sos_bin + res = self.run_command(cmd, use_container=True, need_root=True) + if res['status'] == 0: +- for line in res['stdout'].splitlines(): ++ for line in res['output'].splitlines(): + if line.strip().startswith('name:'): + pname = line.split('name:')[1].strip() + self.sos_info['presets'].append(pname) +@@ -372,21 +338,7 @@ class SosNode(): + """Reads the specified file and returns the contents""" + try: + self.log_info("Reading file %s" % to_read) +- if not self.local: +- res = self.run_command("cat %s" % to_read, timeout=5) +- if res['status'] == 0: +- return res['stdout'] +- else: +- if 'No such file' in res['stdout']: +- self.log_debug("File %s does not exist on node" +- % to_read) +- else: +- self.log_error("Error reading %s: %s" % +- (to_read, res['stdout'].split(':')[1:])) +- return '' +- else: +- with open(to_read, 'r') as rfile: +- return rfile.read() ++ return self._transport.read_file(to_read) + except Exception as err: + self.log_error("Exception while reading %s: %s" % (to_read, err)) + return '' +@@ -400,7 +352,8 @@ class SosNode(): + % self.commons['policy'].distro) + return self.commons['policy'] + host = load(cache={}, sysroot=self.opts.sysroot, init=InitSystem(), +- probe_runtime=True, remote_exec=self.ssh_cmd, ++ probe_runtime=True, ++ remote_exec=self._transport.remote_exec, + remote_check=self.read_file('/etc/os-release')) + if host: + self.log_info("loaded policy %s for host" % host.distro) +@@ -422,7 +375,7 @@ class SosNode(): + return self.host.package_manager.pkg_by_name(pkg) is not None + + def run_command(self, cmd, timeout=180, get_pty=False, need_root=False, +- force_local=False, use_container=False, env=None): ++ use_container=False, env=None): + """Runs a given cmd, either via the SSH session or locally + + Arguments: +@@ -433,58 +386,37 @@ class SosNode(): + need_root - if a command requires root privileges, setting this to + True tells sos-collector to format the command with + sudo or su - as appropriate and to input the password +- force_local - force a command to run locally. Mainly used for scp. + use_container - Run this command in a container *IF* the host is + containerized + """ +- if not self.control_socket_exists and not self.local: +- self.log_debug('Control socket does not exist, attempting to ' +- 're-create') ++ if not self.connected and not self.local: ++ self.log_debug('Node is disconnected, attempting to reconnect') + try: +- _sock = self._create_ssh_session() +- if not _sock: +- self.log_debug('Failed to re-create control socket') +- raise ControlSocketMissingException ++ reconnected = self._transport.reconnect(self._password) ++ if not reconnected: ++ self.log_debug('Failed to reconnect to node') ++ raise ConnectionException + except Exception as err: +- self.log_error('Cannot run command: control socket does not ' +- 'exist') +- self.log_debug("Error while trying to create new SSH control " +- "socket: %s" % err) ++ self.log_debug("Error while trying to reconnect: %s" % err) + raise + if use_container and self.host.containerized: + cmd = self.host.format_container_command(cmd) + if need_root: +- get_pty = True + cmd = self._format_cmd(cmd) +- self.log_debug('Running command %s' % cmd) ++ + if 'atomic' in cmd: + get_pty = True +- if not self.local and not force_local: +- cmd = "%s %s" % (self.ssh_cmd, quote(cmd)) +- else: +- if get_pty: +- cmd = "/bin/bash -c %s" % quote(cmd) ++ ++ if get_pty: ++ cmd = "/bin/bash -c %s" % quote(cmd) ++ + if env: + _cmd_env = self.env_vars + env = _cmd_env.update(env) +- res = pexpect.spawn(cmd, encoding='utf-8', env=env) +- if need_root: +- if self.need_sudo: +- res.sendline(self.opts.sudo_pw) +- if self.opts.become_root: +- res.sendline(self.opts.root_password) +- output = res.expect([pexpect.EOF, pexpect.TIMEOUT], +- timeout=timeout) +- if output == 0: +- out = res.before +- res.close() +- rc = res.exitstatus +- return {'status': rc, 'stdout': out} +- elif output == 1: +- raise CommandTimeoutException(cmd) ++ return self._transport.run_command(cmd, timeout, need_root, env) + + def sosreport(self): +- """Run a sosreport on the node, then collect it""" ++ """Run an sos report on the node, then collect it""" + try: + path = self.execute_sos_command() + if path: +@@ -497,109 +429,6 @@ class SosNode(): + pass + self.cleanup() + +- def _create_ssh_session(self): +- """ +- Using ControlPersist, create the initial connection to the node. +- +- This will generate an OpenSSH ControlPersist socket within the tmp +- directory created or specified for sos-collector to use. +- +- At most, we will wait 30 seconds for a connection. This involves a 15 +- second wait for the initial connection attempt, and a subsequent 15 +- second wait for a response when we supply a password. +- +- Since we connect to nodes in parallel (using the --threads value), this +- means that the time between 'Connecting to nodes...' and 'Beginning +- collection of sosreports' that users see can be up to an amount of time +- equal to 30*(num_nodes/threads) seconds. +- +- Returns +- True if session is successfully opened, else raise Exception +- """ +- # Don't use self.ssh_cmd here as we need to add a few additional +- # parameters to establish the initial connection +- self.log_info('Opening SSH session to create control socket') +- connected = False +- ssh_key = '' +- ssh_port = '' +- if self.opts.ssh_port != 22: +- ssh_port = "-p%s " % self.opts.ssh_port +- if self.opts.ssh_key: +- ssh_key = "-i%s" % self.opts.ssh_key +- cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto " +- "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s " +- "\"echo Connected\"" % (ssh_key, +- ssh_port, +- self.control_path, +- self.opts.ssh_user, +- self.address)) +- res = pexpect.spawn(cmd, encoding='utf-8') +- +- connect_expects = [ +- u'Connected', +- u'password:', +- u'.*Permission denied.*', +- u'.* port .*: No route to host', +- u'.*Could not resolve hostname.*', +- pexpect.TIMEOUT +- ] +- +- index = res.expect(connect_expects, timeout=15) +- +- if index == 0: +- connected = True +- elif index == 1: +- if self._password: +- pass_expects = [ +- u'Connected', +- u'Permission denied, please try again.', +- pexpect.TIMEOUT +- ] +- res.sendline(self._password) +- pass_index = res.expect(pass_expects, timeout=15) +- if pass_index == 0: +- connected = True +- elif pass_index == 1: +- # Note that we do not get an exitstatus here, so matching +- # this line means an invalid password will be reported for +- # both invalid passwords and invalid user names +- raise InvalidPasswordException +- elif pass_index == 2: +- raise TimeoutPasswordAuthException +- else: +- raise PasswordRequestException +- elif index == 2: +- raise AuthPermissionDeniedException +- elif index == 3: +- raise ConnectionException(self.address, self.opts.ssh_port) +- elif index == 4: +- raise ConnectionException(self.address) +- elif index == 5: +- raise ConnectionTimeoutException +- else: +- raise Exception("Unknown error, client returned %s" % res.before) +- if connected: +- self.log_debug("Successfully created control socket at %s" +- % self.control_path) +- return True +- return False +- +- def close_ssh_session(self): +- """Remove the control socket to effectively terminate the session""" +- if self.local: +- return True +- try: +- res = self.run_command("rm -f %s" % self.control_path, +- force_local=True) +- if res['status'] == 0: +- return True +- self.log_error("Could not remove ControlPath %s: %s" +- % (self.control_path, res['stdout'])) +- return False +- except Exception as e: +- self.log_error('Error closing SSH session: %s' % e) +- return False +- + def _preset_exists(self, preset): + """Verifies if the given preset exists on the node""" + return preset in self.sos_info['presets'] +@@ -646,8 +475,8 @@ class SosNode(): + self.cluster = cluster + + def update_cmd_from_cluster(self): +- """This is used to modify the sosreport command run on the nodes. +- By default, sosreport is run without any options, using this will ++ """This is used to modify the sos report command run on the nodes. ++ By default, sos report is run without any options, using this will + allow the profile to specify what plugins to run or not and what + options to use. + +@@ -727,10 +556,6 @@ class SosNode(): + if self.opts.since: + sos_opts.append('--since=%s' % quote(self.opts.since)) + +- # sos-4.0 changes the binary +- if self.check_sos_version('4.0'): +- self.sos_bin = 'sos report' +- + if self.check_sos_version('4.1'): + if self.opts.skip_commands: + sos_opts.append( +@@ -811,7 +636,7 @@ class SosNode(): + self.manifest.add_field('final_sos_command', self.sos_cmd) + + def determine_sos_label(self): +- """Determine what, if any, label should be added to the sosreport""" ++ """Determine what, if any, label should be added to the sos report""" + label = '' + label += self.cluster.get_node_label(self) + +@@ -822,7 +647,7 @@ class SosNode(): + if not label: + return None + +- self.log_debug('Label for sosreport set to %s' % label) ++ self.log_debug('Label for sos report set to %s' % label) + if self.check_sos_version('3.6'): + lcmd = '--label' + else: +@@ -844,20 +669,20 @@ class SosNode(): + + def determine_sos_error(self, rc, stdout): + if rc == -1: +- return 'sosreport process received SIGKILL on node' ++ return 'sos report process received SIGKILL on node' + if rc == 1: + if 'sudo' in stdout: + return 'sudo attempt failed' + if rc == 127: +- return 'sosreport terminated unexpectedly. Check disk space' ++ return 'sos report terminated unexpectedly. Check disk space' + if len(stdout) > 0: + return stdout.split('\n')[0:1] + else: + return 'sos exited with code %s' % rc + + def execute_sos_command(self): +- """Run sosreport and capture the resulting file path""" +- self.ui_msg('Generating sosreport...') ++ """Run sos report and capture the resulting file path""" ++ self.ui_msg('Generating sos report...') + try: + path = False + checksum = False +@@ -867,7 +692,7 @@ class SosNode(): + use_container=True, + env=self.sos_env_vars) + if res['status'] == 0: +- for line in res['stdout'].splitlines(): ++ for line in res['output'].splitlines(): + if fnmatch.fnmatch(line, '*sosreport-*tar*'): + path = line.strip() + if line.startswith((" sha256\t", " md5\t")): +@@ -884,44 +709,31 @@ class SosNode(): + else: + self.manifest.add_field('checksum_type', 'unknown') + else: +- err = self.determine_sos_error(res['status'], res['stdout']) +- self.log_debug("Error running sosreport. rc = %s msg = %s" +- % (res['status'], res['stdout'] or +- res['stderr'])) ++ err = self.determine_sos_error(res['status'], res['output']) ++ self.log_debug("Error running sos report. rc = %s msg = %s" ++ % (res['status'], res['output'])) + raise Exception(err) + return path + except CommandTimeoutException: + self.log_error('Timeout exceeded') + raise + except Exception as e: +- self.log_error('Error running sosreport: %s' % e) ++ self.log_error('Error running sos report: %s' % e) + raise + + def retrieve_file(self, path): + """Copies the specified file from the host to our temp dir""" + destdir = self.tmpdir + '/' +- dest = destdir + path.split('/')[-1] ++ dest = os.path.join(destdir, path.split('/')[-1]) + try: +- if not self.local: +- if self.file_exists(path): +- self.log_info("Copying remote %s to local %s" % +- (path, destdir)) +- cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % ( +- self.control_path, +- self.opts.ssh_user, +- self.address, +- path, +- destdir +- ) +- res = self.run_command(cmd, force_local=True) +- return res['status'] == 0 +- else: +- self.log_debug("Attempting to copy remote file %s, but it " +- "does not exist on filesystem" % path) +- return False ++ if self.file_exists(path): ++ self.log_info("Copying remote %s to local %s" % ++ (path, destdir)) ++ self._transport.retrieve_file(path, dest) + else: +- self.log_debug("Moving %s to %s" % (path, destdir)) +- shutil.copy(path, dest) ++ self.log_debug("Attempting to copy remote file %s, but it " ++ "does not exist on filesystem" % path) ++ return False + return True + except Exception as err: + self.log_debug("Failed to retrieve %s: %s" % (path, err)) +@@ -933,7 +745,7 @@ class SosNode(): + """ + path = ''.join(path.split()) + try: +- if len(path) <= 2: # ensure we have a non '/' path ++ if len(path.split('/')) <= 2: # ensure we have a non '/' path + self.log_debug("Refusing to remove path %s: appears to be " + "incorrect and possibly dangerous" % path) + return False +@@ -959,14 +771,14 @@ class SosNode(): + except Exception: + self.log_error('Failed to make archive readable') + return False +- self.soslog.info('Retrieving sosreport from %s' % self.address) +- self.ui_msg('Retrieving sosreport...') ++ self.soslog.info('Retrieving sos report from %s' % self.address) ++ self.ui_msg('Retrieving sos report...') + ret = self.retrieve_file(self.sos_path) + if ret: +- self.ui_msg('Successfully collected sosreport') ++ self.ui_msg('Successfully collected sos report') + self.file_list.append(self.sos_path.split('/')[-1]) + else: +- self.log_error('Failed to retrieve sosreport') ++ self.log_error('Failed to retrieve sos report') + raise SystemExit + return True + else: +@@ -976,8 +788,8 @@ class SosNode(): + else: + e = [x.strip() for x in self.stdout.readlines() if x.strip][-1] + self.soslog.error( +- 'Failed to run sosreport on %s: %s' % (self.address, e)) +- self.log_error('Failed to run sosreport. %s' % e) ++ 'Failed to run sos report on %s: %s' % (self.address, e)) ++ self.log_error('Failed to run sos report. %s' % e) + return False + + def remove_sos_archive(self): +@@ -986,20 +798,20 @@ class SosNode(): + if self.sos_path is None: + return + if 'sosreport' not in self.sos_path: +- self.log_debug("Node sosreport path %s looks incorrect. Not " ++ self.log_debug("Node sos report path %s looks incorrect. Not " + "attempting to remove path" % self.sos_path) + return + removed = self.remove_file(self.sos_path) + if not removed: +- self.log_error('Failed to remove sosreport') ++ self.log_error('Failed to remove sos report') + + def cleanup(self): + """Remove the sos archive from the node once we have it locally""" + self.remove_sos_archive() + if self.sos_path: + for ext in ['.sha256', '.md5']: +- if os.path.isfile(self.sos_path + ext): +- self.remove_file(self.sos_path + ext) ++ if self.remove_file(self.sos_path + ext): ++ break + cleanup = self.host.set_cleanup_cmd() + if cleanup: + self.run_command(cleanup, need_root=True) +@@ -1040,3 +852,5 @@ class SosNode(): + msg = "Exception while making %s readable. Return code was %s" + self.log_error(msg % (filepath, res['status'])) + raise Exception ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py +new file mode 100644 +index 00000000..5be7dc6d +--- /dev/null ++++ b/sos/collector/transports/__init__.py +@@ -0,0 +1,317 @@ ++# Copyright Red Hat 2021, Jake Hunsaker ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++import inspect ++import logging ++import pexpect ++import re ++ ++from pipes import quote ++from sos.collector.exceptions import (ConnectionException, ++ CommandTimeoutException) ++ ++ ++class RemoteTransport(): ++ """The base class used for defining supported remote transports to connect ++ to remote nodes in conjunction with `sos collect`. ++ ++ This abstraction is used to manage the backend connections to nodes so that ++ SoSNode() objects can be leveraged generically to connect to nodes, inspect ++ those nodes, and run commands on them. ++ """ ++ ++ name = 'undefined' ++ ++ def __init__(self, address, commons): ++ self.address = address ++ self.opts = commons['cmdlineopts'] ++ self.tmpdir = commons['tmpdir'] ++ self.need_sudo = commons['need_sudo'] ++ self._hostname = None ++ self.soslog = logging.getLogger('sos') ++ self.ui_log = logging.getLogger('sos_ui') ++ ++ def _sanitize_log_msg(self, msg): ++ """Attempts to obfuscate sensitive information in log messages such as ++ passwords""" ++ reg = r'(?P(pass|key|secret|PASS|KEY|SECRET).*?=)(?P.*?\s)' ++ return re.sub(reg, r'\g****** ', msg) ++ ++ def log_info(self, msg): ++ """Used to print and log info messages""" ++ caller = inspect.stack()[1][3] ++ lmsg = '[%s:%s] %s' % (self.hostname, caller, msg) ++ self.soslog.info(lmsg) ++ ++ def log_error(self, msg): ++ """Used to print and log error messages""" ++ caller = inspect.stack()[1][3] ++ lmsg = '[%s:%s] %s' % (self.hostname, caller, msg) ++ self.soslog.error(lmsg) ++ ++ def log_debug(self, msg): ++ """Used to print and log debug messages""" ++ msg = self._sanitize_log_msg(msg) ++ caller = inspect.stack()[1][3] ++ msg = '[%s:%s] %s' % (self.hostname, caller, msg) ++ self.soslog.debug(msg) ++ ++ @property ++ def hostname(self): ++ if self._hostname and 'localhost' not in self._hostname: ++ return self._hostname ++ return self.address ++ ++ @property ++ def connected(self): ++ """Is the transport __currently__ connected to the node, or otherwise ++ capable of seamlessly running a command or similar on the node? ++ """ ++ return False ++ ++ @property ++ def remote_exec(self): ++ """This is the command string needed to leverage the remote transport ++ when executing commands. For example, for an SSH transport this would ++ be the `ssh ` string prepended to any command so that the ++ command is executed by the ssh binary. ++ ++ This is also referenced by the `remote_exec` parameter for policies ++ when loading a policy for a remote node ++ """ ++ return None ++ ++ def connect(self, password): ++ """Perform the connection steps in order to ensure that we are able to ++ connect to the node for all future operations. Note that this should ++ not provide an interactive shell at this time. ++ """ ++ if self._connect(password): ++ if not self._hostname: ++ self._get_hostname() ++ return True ++ return False ++ ++ def _connect(self, password): ++ """Actually perform the connection requirements. Should be overridden ++ by specific transports that subclass RemoteTransport ++ """ ++ raise NotImplementedError("Transport %s does not define connect" ++ % self.name) ++ ++ def reconnect(self, password): ++ """Attempts to reconnect to the node using the standard connect() ++ but does not do so indefinitely. This imposes a strict number of retry ++ attempts before failing out ++ """ ++ attempts = 1 ++ last_err = 'unknown' ++ while attempts < 5: ++ self.log_debug("Attempting reconnect (#%s) to node" % attempts) ++ try: ++ if self.connect(password): ++ return True ++ except Exception as err: ++ self.log_debug("Attempt #%s exception: %s" % (attempts, err)) ++ last_err = err ++ attempts += 1 ++ self.log_error("Unable to reconnect to node after 5 attempts, " ++ "aborting.") ++ raise ConnectionException("last exception from transport: %s" ++ % last_err) ++ ++ def disconnect(self): ++ """Perform whatever steps are necessary, if any, to terminate any ++ connection to the node ++ """ ++ try: ++ if self._disconnect(): ++ self.log_debug("Successfully disconnected from node") ++ else: ++ self.log_error("Unable to successfully disconnect, see log for" ++ " more details") ++ except Exception as err: ++ self.log_error("Failed to disconnect: %s" % err) ++ ++ def _disconnect(self): ++ raise NotImplementedError("Transport %s does not define disconnect" ++ % self.name) ++ ++ def run_command(self, cmd, timeout=180, need_root=False, env=None): ++ """Run a command on the node, returning its output and exit code. ++ This should return the exit code of the command being executed, not the ++ exit code of whatever mechanism the transport uses to execute that ++ command ++ ++ :param cmd: The command to run ++ :type cmd: ``str`` ++ ++ :param timeout: The maximum time in seconds to allow the cmd to run ++ :type timeout: ``int`` ++ ++ :param get_pty: Does ``cmd`` require a pty? ++ :type get_pty: ``bool`` ++ ++ :param need_root: Does ``cmd`` require root privileges? ++ :type neeed_root: ``bool`` ++ ++ :param env: Specify env vars to be passed to the ``cmd`` ++ :type env: ``dict`` ++ ++ :returns: Output of ``cmd`` and the exit code ++ :rtype: ``dict`` with keys ``output`` and ``status`` ++ """ ++ self.log_debug('Running command %s' % cmd) ++ # currently we only use/support the use of pexpect for handling the ++ # execution of these commands, as opposed to directly invoking ++ # subprocess.Popen() in conjunction with tools like sshpass. ++ # If that changes in the future, we'll add decision making logic here ++ # to route to the appropriate handler, but for now we just go straight ++ # to using pexpect ++ return self._run_command_with_pexpect(cmd, timeout, need_root, env) ++ ++ def _format_cmd_for_exec(self, cmd): ++ """Format the command in the way needed for the remote transport to ++ successfully execute it as one would when manually executing it ++ ++ :param cmd: The command being executed, as formatted by SoSNode ++ :type cmd: ``str`` ++ ++ ++ :returns: The command further formatted as needed by this ++ transport ++ :rtype: ``str`` ++ """ ++ cmd = "%s %s" % (self.remote_exec, quote(cmd)) ++ cmd = cmd.lstrip() ++ return cmd ++ ++ def _run_command_with_pexpect(self, cmd, timeout, need_root, env): ++ """Execute the command using pexpect, which allows us to more easily ++ handle prompts and timeouts compared to directly leveraging the ++ subprocess.Popen() method. ++ ++ :param cmd: The command to execute. This will be automatically ++ formatted to use the transport. ++ :type cmd: ``str`` ++ ++ :param timeout: The maximum time in seconds to run ``cmd`` ++ :type timeout: ``int`` ++ ++ :param need_root: Does ``cmd`` need to run as root or with sudo? ++ :type need_root: ``bool`` ++ ++ :param env: Any env vars that ``cmd`` should be run with ++ :type env: ``dict`` ++ """ ++ cmd = self._format_cmd_for_exec(cmd) ++ result = pexpect.spawn(cmd, encoding='utf-8', env=env) ++ ++ _expects = [pexpect.EOF, pexpect.TIMEOUT] ++ if need_root and self.opts.ssh_user != 'root': ++ _expects.extend([ ++ '\\[sudo\\] password for .*:', ++ 'Password:' ++ ]) ++ ++ index = result.expect(_expects, timeout=timeout) ++ ++ if index in [2, 3]: ++ self._send_pexpect_password(index, result) ++ index = result.expect(_expects, timeout=timeout) ++ ++ if index == 0: ++ out = result.before ++ result.close() ++ return {'status': result.exitstatus, 'output': out} ++ elif index == 1: ++ raise CommandTimeoutException(cmd) ++ ++ def _send_pexpect_password(self, index, result): ++ """Handle password prompts for sudo and su usage for non-root SSH users ++ ++ :param index: The index pexpect.spawn returned to match against ++ either a sudo or su prompt ++ :type index: ``int`` ++ ++ :param result: The spawn running the command ++ :type result: ``pexpect.spawn`` ++ """ ++ if index == 2: ++ if not self.opts.sudo_pw and not self.opt.nopasswd_sudo: ++ msg = ("Unable to run command: sudo password " ++ "required but not provided") ++ self.log_error(msg) ++ raise Exception(msg) ++ result.sendline(self.opts.sudo_pw) ++ elif index == 3: ++ if not self.opts.root_password: ++ msg = ("Unable to run command as root: no root password given") ++ self.log_error(msg) ++ raise Exception(msg) ++ result.sendline(self.opts.root_password) ++ ++ def _get_hostname(self): ++ """Determine the hostname of the node and set that for future reference ++ and logging ++ ++ :returns: The hostname of the system, per the `hostname` command ++ :rtype: ``str`` ++ """ ++ _out = self.run_command('hostname') ++ if _out['status'] == 0: ++ self._hostname = _out['output'].strip() ++ self.log_info("Hostname set to %s" % self._hostname) ++ return self._hostname ++ ++ def retrieve_file(self, fname, dest): ++ """Copy a remote file, fname, to dest on the local node ++ ++ :param fname: The name of the file to retrieve ++ :type fname: ``str`` ++ ++ :param dest: Where to save the file to locally ++ :type dest: ``str`` ++ ++ :returns: True if file was successfully copied from remote, or False ++ :rtype: ``bool`` ++ """ ++ return self._retrieve_file(fname, dest) ++ ++ def _retrieve_file(self, fname, dest): ++ raise NotImplementedError("Transport %s does not support file copying" ++ % self.name) ++ ++ def read_file(self, fname): ++ """Read the given file fname and return its contents ++ ++ :param fname: The name of the file to read ++ :type fname: ``str`` ++ ++ :returns: The content of the file ++ :rtype: ``str`` ++ """ ++ self.log_debug("Reading file %s" % fname) ++ return self._read_file(fname) ++ ++ def _read_file(self, fname): ++ res = self.run_command("cat %s" % fname, timeout=5) ++ if res['status'] == 0: ++ return res['output'] ++ else: ++ if 'No such file' in res['output']: ++ self.log_debug("File %s does not exist on node" ++ % fname) ++ else: ++ self.log_error("Error reading %s: %s" % ++ (fname, res['output'].split(':')[1:])) ++ return '' ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py +new file mode 100644 +index 00000000..3e848b41 +--- /dev/null ++++ b/sos/collector/transports/control_persist.py +@@ -0,0 +1,199 @@ ++# Copyright Red Hat 2021, Jake Hunsaker ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++ ++import os ++import pexpect ++import subprocess ++ ++from sos.collector.transports import RemoteTransport ++from sos.collector.exceptions import (InvalidPasswordException, ++ TimeoutPasswordAuthException, ++ PasswordRequestException, ++ AuthPermissionDeniedException, ++ ConnectionException, ++ ConnectionTimeoutException, ++ ControlSocketMissingException, ++ ControlPersistUnsupportedException) ++from sos.utilities import sos_get_command_output ++ ++ ++class SSHControlPersist(RemoteTransport): ++ """A transport for collect that leverages OpenSSH's Control Persist ++ functionality which uses control sockets to transparently keep a connection ++ open to the remote host without needing to rebuild the SSH connection for ++ each and every command executed on the node ++ """ ++ ++ name = 'control_persist' ++ ++ def _check_for_control_persist(self): ++ """Checks to see if the local system supported SSH ControlPersist. ++ ++ ControlPersist allows OpenSSH to keep a single open connection to a ++ remote host rather than building a new session each time. This is the ++ same feature that Ansible uses in place of paramiko, which we have a ++ need to drop in sos-collector. ++ ++ This check relies on feedback from the ssh binary. The command being ++ run should always generate stderr output, but depending on what that ++ output reads we can determine if ControlPersist is supported or not. ++ ++ For our purposes, a host that does not support ControlPersist is not ++ able to run sos-collector. ++ ++ Returns ++ True if ControlPersist is supported, else raise Exception. ++ """ ++ ssh_cmd = ['ssh', '-o', 'ControlPersist'] ++ cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, ++ stderr=subprocess.PIPE) ++ out, err = cmd.communicate() ++ err = err.decode('utf-8') ++ if 'Bad configuration option' in err or 'Usage:' in err: ++ raise ControlPersistUnsupportedException ++ return True ++ ++ def _connect(self, password=''): ++ """ ++ Using ControlPersist, create the initial connection to the node. ++ ++ This will generate an OpenSSH ControlPersist socket within the tmp ++ directory created or specified for sos-collector to use. ++ ++ At most, we will wait 30 seconds for a connection. This involves a 15 ++ second wait for the initial connection attempt, and a subsequent 15 ++ second wait for a response when we supply a password. ++ ++ Since we connect to nodes in parallel (using the --threads value), this ++ means that the time between 'Connecting to nodes...' and 'Beginning ++ collection of sosreports' that users see can be up to an amount of time ++ equal to 30*(num_nodes/threads) seconds. ++ ++ Returns ++ True if session is successfully opened, else raise Exception ++ """ ++ try: ++ self._check_for_control_persist() ++ except ControlPersistUnsupportedException: ++ self.log_error("OpenSSH ControlPersist is not locally supported. " ++ "Please update your OpenSSH installation.") ++ raise ++ self.log_info('Opening SSH session to create control socket') ++ self.control_path = ("%s/.sos-collector-%s" % (self.tmpdir, ++ self.address)) ++ self.ssh_cmd = '' ++ connected = False ++ ssh_key = '' ++ ssh_port = '' ++ if self.opts.ssh_port != 22: ++ ssh_port = "-p%s " % self.opts.ssh_port ++ if self.opts.ssh_key: ++ ssh_key = "-i%s" % self.opts.ssh_key ++ ++ cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto " ++ "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s " ++ "\"echo Connected\"" % (ssh_key, ++ ssh_port, ++ self.control_path, ++ self.opts.ssh_user, ++ self.address)) ++ res = pexpect.spawn(cmd, encoding='utf-8') ++ ++ connect_expects = [ ++ u'Connected', ++ u'password:', ++ u'.*Permission denied.*', ++ u'.* port .*: No route to host', ++ u'.*Could not resolve hostname.*', ++ pexpect.TIMEOUT ++ ] ++ ++ index = res.expect(connect_expects, timeout=15) ++ ++ if index == 0: ++ connected = True ++ elif index == 1: ++ if password: ++ pass_expects = [ ++ u'Connected', ++ u'Permission denied, please try again.', ++ pexpect.TIMEOUT ++ ] ++ res.sendline(password) ++ pass_index = res.expect(pass_expects, timeout=15) ++ if pass_index == 0: ++ connected = True ++ elif pass_index == 1: ++ # Note that we do not get an exitstatus here, so matching ++ # this line means an invalid password will be reported for ++ # both invalid passwords and invalid user names ++ raise InvalidPasswordException ++ elif pass_index == 2: ++ raise TimeoutPasswordAuthException ++ else: ++ raise PasswordRequestException ++ elif index == 2: ++ raise AuthPermissionDeniedException ++ elif index == 3: ++ raise ConnectionException(self.address, self.opts.ssh_port) ++ elif index == 4: ++ raise ConnectionException(self.address) ++ elif index == 5: ++ raise ConnectionTimeoutException ++ else: ++ raise Exception("Unknown error, client returned %s" % res.before) ++ if connected: ++ if not os.path.exists(self.control_path): ++ raise ControlSocketMissingException ++ self.log_debug("Successfully created control socket at %s" ++ % self.control_path) ++ return True ++ return False ++ ++ def _disconnect(self): ++ if os.path.exists(self.control_path): ++ try: ++ os.remove(self.control_path) ++ return True ++ except Exception as err: ++ self.log_debug("Could not disconnect properly: %s" % err) ++ return False ++ self.log_debug("Control socket not present when attempting to " ++ "terminate session") ++ ++ @property ++ def connected(self): ++ """Check if the SSH control socket exists ++ ++ The control socket is automatically removed by the SSH daemon in the ++ event that the last connection to the node was greater than the timeout ++ set by the ControlPersist option. This can happen for us if we are ++ collecting from a large number of nodes, and the timeout expires before ++ we start collection. ++ """ ++ return os.path.exists(self.control_path) ++ ++ @property ++ def remote_exec(self): ++ if not self.ssh_cmd: ++ self.ssh_cmd = "ssh -oControlPath=%s %s@%s" % ( ++ self.control_path, self.opts.ssh_user, self.address ++ ) ++ return self.ssh_cmd ++ ++ def _retrieve_file(self, fname, dest): ++ cmd = "/usr/bin/scp -oControlPath=%s %s@%s:%s %s" % ( ++ self.control_path, self.opts.ssh_user, self.address, fname, dest ++ ) ++ res = sos_get_command_output(cmd) ++ return res['status'] == 0 ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py +new file mode 100644 +index 00000000..a4897f19 +--- /dev/null ++++ b/sos/collector/transports/local.py +@@ -0,0 +1,49 @@ ++# Copyright Red Hat 2021, Jake Hunsaker ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++import os ++import shutil ++ ++from sos.collector.transports import RemoteTransport ++ ++ ++class LocalTransport(RemoteTransport): ++ """A 'transport' to represent a local node. This allows us to more easily ++ extend SoSNode() without having a ton of 'if local' or similar checks in ++ more places than we actually need them ++ """ ++ ++ name = 'local_node' ++ ++ def _connect(self, password): ++ return True ++ ++ def _disconnect(self): ++ return True ++ ++ @property ++ def connected(self): ++ return True ++ ++ def _retrieve_file(self, fname, dest): ++ self.log_debug("Moving %s to %s" % (fname, dest)) ++ shutil.copy(fname, dest) ++ ++ def _format_cmd_for_exec(self, cmd): ++ return cmd ++ ++ def _read_file(self, fname): ++ if os.path.exists(fname): ++ with open(fname, 'r') as rfile: ++ return rfile.read() ++ self.log_debug("No such file: %s" % fname) ++ return '' ++ ++# vim: set et ts=4 sw=4 : +-- +2.31.1 + +From e869bc84c714bfc2249bbcb84e14908049ee42c4 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 27 Sep 2021 12:07:08 -0400 +Subject: [PATCH 1/2] [Plugin,utilities] Add sysroot wrapper for os.path.join + +Adds a wrapper for `os.path.join()` which accounts for non-/ sysroots, +like we have done previously for other `os.path` methods. Further +updates `Plugin()` to use this wrapper where appropriate. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 43 +++++++++++++++++----------------- + sos/utilities.py | 6 +++++ + 2 files changed, 28 insertions(+), 21 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index c635b8de..1f84bca4 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -13,7 +13,7 @@ + from sos.utilities import (sos_get_command_output, import_module, grep, + fileobj, tail, is_executable, TIMEOUT_DEFAULT, + path_exists, path_isdir, path_isfile, path_islink, +- listdir) ++ listdir, path_join) + + import os + import glob +@@ -708,19 +708,6 @@ class Plugin(): + def _log_debug(self, msg): + self.soslog.debug(self._format_msg(msg)) + +- def join_sysroot(self, path): +- """Join a given path with the configured sysroot +- +- :param path: The filesystem path that needs to be joined +- :type path: ``str`` +- +- :returns: The joined filesystem path +- :rtype: ``str`` +- """ +- if path[0] == os.sep: +- path = path[1:] +- return os.path.join(self.sysroot, path) +- + def strip_sysroot(self, path): + """Remove the configured sysroot from a filesystem path + +@@ -1176,7 +1163,7 @@ class Plugin(): + + def _get_dest_for_srcpath(self, srcpath): + if self.use_sysroot(): +- srcpath = self.join_sysroot(srcpath) ++ srcpath = self.path_join(srcpath) + for copied in self.copied_files: + if srcpath == copied["srcpath"]: + return copied["dstpath"] +@@ -1284,7 +1271,7 @@ class Plugin(): + forbidden = [forbidden] + + if self.use_sysroot(): +- forbidden = [self.join_sysroot(f) for f in forbidden] ++ forbidden = [self.path_join(f) for f in forbidden] + + for forbid in forbidden: + self._log_info("adding forbidden path '%s'" % forbid) +@@ -1438,7 +1425,7 @@ class Plugin(): + since = self.get_option('since') + + logarchive_pattern = re.compile(r'.*((\.(zip|gz|bz2|xz))|[-.][\d]+)$') +- configfile_pattern = re.compile(r"^%s/*" % self.join_sysroot("etc")) ++ configfile_pattern = re.compile(r"^%s/*" % self.path_join("etc")) + + if not self.test_predicate(pred=pred): + self._log_info("skipped copy spec '%s' due to predicate (%s)" % +@@ -1468,7 +1455,7 @@ class Plugin(): + return False + + if self.use_sysroot(): +- copyspec = self.join_sysroot(copyspec) ++ copyspec = self.path_join(copyspec) + + files = self._expand_copy_spec(copyspec) + +@@ -1683,7 +1670,7 @@ class Plugin(): + if not _dev_ok: + continue + if prepend_path: +- device = os.path.join(prepend_path, device) ++ device = self.path_join(prepend_path, device) + _cmd = cmd % {'dev': device} + self._add_cmd_output(cmd=_cmd, timeout=timeout, + sizelimit=sizelimit, chroot=chroot, +@@ -2592,7 +2579,7 @@ class Plugin(): + if self.path_isfile(path) or self.path_islink(path): + found_paths.append(path) + elif self.path_isdir(path) and self.listdir(path): +- found_paths.extend(__expand(os.path.join(path, '*'))) ++ found_paths.extend(__expand(self.path_join(path, '*'))) + else: + found_paths.append(path) + except PermissionError: +@@ -2608,7 +2595,7 @@ class Plugin(): + if (os.access(copyspec, os.R_OK) and self.path_isdir(copyspec) and + self.listdir(copyspec)): + # the directory exists and is non-empty, recurse through it +- copyspec = os.path.join(copyspec, '*') ++ copyspec = self.path_join(copyspec, '*') + expanded = glob.glob(copyspec, recursive=True) + recursed_files = [] + for _path in expanded: +@@ -2877,6 +2864,20 @@ class Plugin(): + """ + return listdir(path, self.commons['cmdlineopts'].sysroot) + ++ def path_join(self, path, *p): ++ """Helper to call the sos.utilities wrapper that allows the ++ corresponding `os` call to account for sysroot ++ ++ :param path: The leading path passed to os.path.join() ++ :type path: ``str`` ++ ++ :param p: Following path section(s) to be joined with ``path``, ++ an empty parameter will result in a path that ends with ++ a separator ++ :type p: ``str`` ++ """ ++ return path_join(path, *p, sysroot=self.sysroot) ++ + def postproc(self): + """Perform any postprocessing. To be replaced by a plugin if required. + """ +diff --git a/sos/utilities.py b/sos/utilities.py +index c940e066..b7575153 100644 +--- a/sos/utilities.py ++++ b/sos/utilities.py +@@ -242,6 +242,12 @@ def listdir(path, sysroot): + return _os_wrapper(path, sysroot, 'listdir', os) + + ++def path_join(path, *p, sysroot=os.sep): ++ if not path.startswith(sysroot): ++ path = os.path.join(sysroot, path.lstrip(os.sep)) ++ return os.path.join(path, *p) ++ ++ + class AsyncReader(threading.Thread): + """Used to limit command output to a given size without deadlocking + sos. +-- +2.31.1 + + +From 07d96d52ef69b9f8fe1ef32a1b88089d31c33fe8 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 27 Sep 2021 12:28:27 -0400 +Subject: [PATCH 2/2] [plugins] Update plugins to use new os.path.join wrapper + +Updates plugins to use the new `self.path_join()` wrapper for +`os.path.join()` so that these plugins now account for non-/ sysroots +for their collections. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 2 +- + sos/report/plugins/azure.py | 4 +-- + sos/report/plugins/collectd.py | 2 +- + sos/report/plugins/container_log.py | 2 +- + sos/report/plugins/corosync.py | 2 +- + sos/report/plugins/docker_distribution.py | 5 ++-- + sos/report/plugins/ds.py | 3 +-- + sos/report/plugins/elastic.py | 4 ++- + sos/report/plugins/etcd.py | 2 +- + sos/report/plugins/gluster.py | 3 ++- + sos/report/plugins/jars.py | 2 +- + sos/report/plugins/kdump.py | 4 +-- + sos/report/plugins/libvirt.py | 2 +- + sos/report/plugins/logs.py | 8 +++--- + sos/report/plugins/manageiq.py | 12 ++++----- + sos/report/plugins/numa.py | 9 +++---- + sos/report/plugins/openstack_instack.py | 2 +- + sos/report/plugins/openstack_nova.py | 2 +- + sos/report/plugins/openvswitch.py | 13 ++++----- + sos/report/plugins/origin.py | 28 +++++++++++--------- + sos/report/plugins/ovirt.py | 2 +- + sos/report/plugins/ovirt_engine_backup.py | 5 ++-- + sos/report/plugins/ovn_central.py | 26 +++++++++--------- + sos/report/plugins/ovn_host.py | 4 +-- + sos/report/plugins/pacemaker.py | 4 +-- + sos/report/plugins/pcp.py | 32 +++++++++++------------ + sos/report/plugins/postfix.py | 2 +- + sos/report/plugins/postgresql.py | 2 +- + sos/report/plugins/powerpc.py | 2 +- + sos/report/plugins/processor.py | 3 +-- + sos/report/plugins/python.py | 4 +-- + sos/report/plugins/sar.py | 5 ++-- + sos/report/plugins/sos_extras.py | 2 +- + sos/report/plugins/ssh.py | 7 +++-- + sos/report/plugins/unpackaged.py | 4 +-- + sos/report/plugins/watchdog.py | 13 +++++---- + sos/report/plugins/yum.py | 2 +- + 37 files changed, 115 insertions(+), 115 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 1f84bca4..ec138f83 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -2897,7 +2897,7 @@ class Plugin(): + try: + cmd_line_paths = glob.glob(cmd_line_glob) + for path in cmd_line_paths: +- f = open(path, 'r') ++ f = open(self.path_join(path), 'r') + cmd_line = f.read().strip() + if process in cmd_line: + status = True +diff --git a/sos/report/plugins/azure.py b/sos/report/plugins/azure.py +index 45971a61..90999b3f 100644 +--- a/sos/report/plugins/azure.py ++++ b/sos/report/plugins/azure.py +@@ -8,8 +8,8 @@ + # + # See the LICENSE file in the source distribution for further information. + +-import os + from sos.report.plugins import Plugin, UbuntuPlugin, RedHatPlugin ++import os + + + class Azure(Plugin, UbuntuPlugin): +@@ -38,7 +38,7 @@ class Azure(Plugin, UbuntuPlugin): + + for path, subdirs, files in os.walk("/var/log/azure"): + for name in files: +- self.add_copy_spec(os.path.join(path, name), sizelimit=limit) ++ self.add_copy_spec(self.path_join(path, name), sizelimit=limit) + + self.add_cmd_output(( + 'curl -s -H Metadata:true ' +diff --git a/sos/report/plugins/collectd.py b/sos/report/plugins/collectd.py +index 80d4b00a..8584adf9 100644 +--- a/sos/report/plugins/collectd.py ++++ b/sos/report/plugins/collectd.py +@@ -33,7 +33,7 @@ class Collectd(Plugin, IndependentPlugin): + + p = re.compile('^LoadPlugin.*') + try: +- with open("/etc/collectd.conf") as f: ++ with open(self.path_join("/etc/collectd.conf"), 'r') as f: + for line in f: + if p.match(line): + self.add_alert("Active Plugin found: %s" % +diff --git a/sos/report/plugins/container_log.py b/sos/report/plugins/container_log.py +index 14e0b7d8..e8dedad2 100644 +--- a/sos/report/plugins/container_log.py ++++ b/sos/report/plugins/container_log.py +@@ -29,6 +29,6 @@ class ContainerLog(Plugin, IndependentPlugin): + """Collect *.log files from subdirs of passed root path + """ + for dirName, _, _ in os.walk(root): +- self.add_copy_spec(os.path.join(dirName, '*.log')) ++ self.add_copy_spec(self.path_join(dirName, '*.log')) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/corosync.py b/sos/report/plugins/corosync.py +index d74086e3..10e096c6 100644 +--- a/sos/report/plugins/corosync.py ++++ b/sos/report/plugins/corosync.py +@@ -47,7 +47,7 @@ class Corosync(Plugin): + # (it isnt precise but sufficient) + pattern = r'^\s*(logging.)?logfile:\s*(\S+)$' + try: +- with open("/etc/corosync/corosync.conf") as f: ++ with open(self.path_join("/etc/corosync/corosync.conf"), 'r') as f: + for line in f: + if re.match(pattern, line): + self.add_copy_spec(re.search(pattern, line).group(2)) +diff --git a/sos/report/plugins/docker_distribution.py b/sos/report/plugins/docker_distribution.py +index 84222ff7..e760f252 100644 +--- a/sos/report/plugins/docker_distribution.py ++++ b/sos/report/plugins/docker_distribution.py +@@ -19,8 +19,9 @@ class DockerDistribution(Plugin): + def setup(self): + self.add_copy_spec('/etc/docker-distribution/') + self.add_journal('docker-distribution') +- if self.path_exists('/etc/docker-distribution/registry/config.yml'): +- with open('/etc/docker-distribution/registry/config.yml') as f: ++ conf = self.path_join('/etc/docker-distribution/registry/config.yml') ++ if self.path_exists(conf): ++ with open(conf) as f: + for line in f: + if 'rootdirectory' in line: + loc = line.split()[1] +diff --git a/sos/report/plugins/ds.py b/sos/report/plugins/ds.py +index addf49e1..43feb21e 100644 +--- a/sos/report/plugins/ds.py ++++ b/sos/report/plugins/ds.py +@@ -11,7 +11,6 @@ + # See the LICENSE file in the source distribution for further information. + + from sos.report.plugins import Plugin, RedHatPlugin +-import os + + + class DirectoryServer(Plugin, RedHatPlugin): +@@ -47,7 +46,7 @@ class DirectoryServer(Plugin, RedHatPlugin): + try: + for d in self.listdir("/etc/dirsrv"): + if d[0:5] == 'slapd': +- certpath = os.path.join("/etc/dirsrv", d) ++ certpath = self.path_join("/etc/dirsrv", d) + self.add_cmd_output("certutil -L -d %s" % certpath) + self.add_cmd_output("dsctl %s healthcheck" % d) + except OSError: +diff --git a/sos/report/plugins/elastic.py b/sos/report/plugins/elastic.py +index ad9a06ff..da2662bc 100644 +--- a/sos/report/plugins/elastic.py ++++ b/sos/report/plugins/elastic.py +@@ -39,7 +39,9 @@ class Elastic(Plugin, IndependentPlugin): + return hostname, port + + def setup(self): +- els_config_file = "/etc/elasticsearch/elasticsearch.yml" ++ els_config_file = self.path_join( ++ "/etc/elasticsearch/elasticsearch.yml" ++ ) + self.add_copy_spec(els_config_file) + + if self.get_option("all_logs"): +diff --git a/sos/report/plugins/etcd.py b/sos/report/plugins/etcd.py +index fd4f67eb..fe017e9f 100644 +--- a/sos/report/plugins/etcd.py ++++ b/sos/report/plugins/etcd.py +@@ -62,7 +62,7 @@ class etcd(Plugin, RedHatPlugin): + + def get_etcd_url(self): + try: +- with open('/etc/etcd/etcd.conf', 'r') as ef: ++ with open(self.path_join('/etc/etcd/etcd.conf'), 'r') as ef: + for line in ef: + if line.startswith('ETCD_LISTEN_CLIENT_URLS'): + return line.split('=')[1].replace('"', '').strip() +diff --git a/sos/report/plugins/gluster.py b/sos/report/plugins/gluster.py +index a44ffeb7..e518e3d3 100644 +--- a/sos/report/plugins/gluster.py ++++ b/sos/report/plugins/gluster.py +@@ -35,9 +35,10 @@ class Gluster(Plugin, RedHatPlugin): + ] + for statedump_file in statedump_entries: + statedumps_present = statedumps_present+1 ++ _spath = self.path_join(name_dir, statedump_file) + ret = -1 + while ret == -1: +- with open(name_dir + '/' + statedump_file, 'r') as sfile: ++ with open(_spath, 'r') as sfile: + last_line = sfile.readlines()[-1] + ret = string.count(last_line, 'DUMP_END_TIME') + +diff --git a/sos/report/plugins/jars.py b/sos/report/plugins/jars.py +index 0d3cf37e..4b98684e 100644 +--- a/sos/report/plugins/jars.py ++++ b/sos/report/plugins/jars.py +@@ -63,7 +63,7 @@ class Jars(Plugin, RedHatPlugin): + for location in locations: + for dirpath, _, filenames in os.walk(location): + for filename in filenames: +- path = os.path.join(dirpath, filename) ++ path = self.path_join(dirpath, filename) + if Jars.is_jar(path): + jar_paths.append(path) + +diff --git a/sos/report/plugins/kdump.py b/sos/report/plugins/kdump.py +index 757c2736..66565664 100644 +--- a/sos/report/plugins/kdump.py ++++ b/sos/report/plugins/kdump.py +@@ -40,7 +40,7 @@ class RedHatKDump(KDump, RedHatPlugin): + packages = ('kexec-tools',) + + def fstab_parse_fs(self, device): +- with open('/etc/fstab', 'r') as fp: ++ with open(self.path_join('/etc/fstab'), 'r') as fp: + for line in fp: + if line.startswith((device)): + return line.split()[1].rstrip('/') +@@ -50,7 +50,7 @@ class RedHatKDump(KDump, RedHatPlugin): + fs = "" + path = "/var/crash" + +- with open('/etc/kdump.conf', 'r') as fp: ++ with open(self.path_join('/etc/kdump.conf'), 'r') as fp: + for line in fp: + if line.startswith("path"): + path = line.split()[1] +diff --git a/sos/report/plugins/libvirt.py b/sos/report/plugins/libvirt.py +index be8120ff..5caa5802 100644 +--- a/sos/report/plugins/libvirt.py ++++ b/sos/report/plugins/libvirt.py +@@ -55,7 +55,7 @@ class Libvirt(Plugin, IndependentPlugin): + else: + self.add_copy_spec("/var/log/libvirt") + +- if self.path_exists(self.join_sysroot(libvirt_keytab)): ++ if self.path_exists(self.path_join(libvirt_keytab)): + self.add_cmd_output("klist -ket %s" % libvirt_keytab) + + self.add_cmd_output("ls -lR /var/lib/libvirt/qemu") +diff --git a/sos/report/plugins/logs.py b/sos/report/plugins/logs.py +index ee6bb98d..606e574a 100644 +--- a/sos/report/plugins/logs.py ++++ b/sos/report/plugins/logs.py +@@ -24,15 +24,15 @@ class Logs(Plugin, IndependentPlugin): + since = self.get_option("since") + + if self.path_exists('/etc/rsyslog.conf'): +- with open('/etc/rsyslog.conf', 'r') as conf: ++ with open(self.path_join('/etc/rsyslog.conf'), 'r') as conf: + for line in conf.readlines(): + if line.startswith('$IncludeConfig'): + confs += glob.glob(line.split()[1]) + + for conf in confs: +- if not self.path_exists(conf): ++ if not self.path_exists(self.path_join(conf)): + continue +- config = self.join_sysroot(conf) ++ config = self.path_join(conf) + logs += self.do_regex_find_all(r"^\S+\s+(-?\/.*$)\s+", config) + + for i in logs: +@@ -60,7 +60,7 @@ class Logs(Plugin, IndependentPlugin): + # - there is some data present, either persistent or runtime only + # - systemd-journald service exists + # otherwise fallback to collecting few well known logfiles directly +- journal = any([self.path_exists(p + "/log/journal/") ++ journal = any([self.path_exists(self.path_join(p, "log/journal/")) + for p in ["/var", "/run"]]) + if journal and self.is_service("systemd-journald"): + self.add_journal(since=since, tags='journal_full', priority=100) +diff --git a/sos/report/plugins/manageiq.py b/sos/report/plugins/manageiq.py +index 27ad6ef4..e20c4a2a 100644 +--- a/sos/report/plugins/manageiq.py ++++ b/sos/report/plugins/manageiq.py +@@ -58,7 +58,7 @@ class ManageIQ(Plugin, RedHatPlugin): + # Log files to collect from miq_dir/log/ + miq_log_dir = os.path.join(miq_dir, "log") + +- miq_main_log_files = [ ++ miq_main_logs = [ + 'ansible_tower.log', + 'top_output.log', + 'evm.log', +@@ -81,16 +81,16 @@ class ManageIQ(Plugin, RedHatPlugin): + self.add_copy_spec(list(self.files)) + + self.add_copy_spec([ +- os.path.join(self.miq_conf_dir, x) for x in self.miq_conf_files ++ self.path_join(self.miq_conf_dir, x) for x in self.miq_conf_files + ]) + + # Collect main log files without size limit. + self.add_copy_spec([ +- os.path.join(self.miq_log_dir, x) for x in self.miq_main_log_files ++ self.path_join(self.miq_log_dir, x) for x in self.miq_main_logs + ], sizelimit=0) + + self.add_copy_spec([ +- os.path.join(self.miq_log_dir, x) for x in self.miq_log_files ++ self.path_join(self.miq_log_dir, x) for x in self.miq_log_files + ]) + + self.add_copy_spec([ +@@ -101,8 +101,8 @@ class ManageIQ(Plugin, RedHatPlugin): + if environ.get("APPLIANCE_PG_DATA"): + pg_dir = environ.get("APPLIANCE_PG_DATA") + self.add_copy_spec([ +- os.path.join(pg_dir, 'pg_log'), +- os.path.join(pg_dir, 'postgresql.conf') ++ self.path_join(pg_dir, 'pg_log'), ++ self.path_join(pg_dir, 'postgresql.conf') + ]) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/numa.py b/sos/report/plugins/numa.py +index 0faef8d2..9094baef 100644 +--- a/sos/report/plugins/numa.py ++++ b/sos/report/plugins/numa.py +@@ -9,7 +9,6 @@ + # See the LICENSE file in the source distribution for further information. + + from sos.report.plugins import Plugin, IndependentPlugin +-import os.path + + + class Numa(Plugin, IndependentPlugin): +@@ -42,10 +41,10 @@ class Numa(Plugin, IndependentPlugin): + ]) + + self.add_copy_spec([ +- os.path.join(numa_path, "node*/meminfo"), +- os.path.join(numa_path, "node*/cpulist"), +- os.path.join(numa_path, "node*/distance"), +- os.path.join(numa_path, "node*/hugepages/hugepages-*/*") ++ self.path_join(numa_path, "node*/meminfo"), ++ self.path_join(numa_path, "node*/cpulist"), ++ self.path_join(numa_path, "node*/distance"), ++ self.path_join(numa_path, "node*/hugepages/hugepages-*/*") + ]) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/openstack_instack.py b/sos/report/plugins/openstack_instack.py +index 7c56c162..5b4f7d41 100644 +--- a/sos/report/plugins/openstack_instack.py ++++ b/sos/report/plugins/openstack_instack.py +@@ -68,7 +68,7 @@ class OpenStackInstack(Plugin): + p = uc_config.get(opt) + if p: + if not os.path.isabs(p): +- p = os.path.join('/home/stack', p) ++ p = self.path_join('/home/stack', p) + self.add_copy_spec(p) + except Exception: + pass +diff --git a/sos/report/plugins/openstack_nova.py b/sos/report/plugins/openstack_nova.py +index 53210c48..f840081e 100644 +--- a/sos/report/plugins/openstack_nova.py ++++ b/sos/report/plugins/openstack_nova.py +@@ -103,7 +103,7 @@ class OpenStackNova(Plugin): + "nova-scheduler.log*" + ] + for novalog in novalogs: +- self.add_copy_spec(os.path.join(novadir, novalog)) ++ self.add_copy_spec(self.path_join(novadir, novalog)) + + self.add_copy_spec([ + "/etc/nova/", +diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py +index 003596c6..179d1532 100644 +--- a/sos/report/plugins/openvswitch.py ++++ b/sos/report/plugins/openvswitch.py +@@ -10,7 +10,6 @@ + + from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin + +-from os.path import join as path_join + from os import environ + + import re +@@ -65,7 +64,9 @@ class OpenVSwitch(Plugin): + log_dirs.append(environ.get('OVS_LOGDIR')) + + if not all_logs: +- self.add_copy_spec([path_join(ld, '*.log') for ld in log_dirs]) ++ self.add_copy_spec([ ++ self.path_join(ld, '*.log') for ld in log_dirs ++ ]) + else: + self.add_copy_spec(log_dirs) + +@@ -76,13 +77,13 @@ class OpenVSwitch(Plugin): + ]) + + self.add_copy_spec([ +- path_join('/usr/local/etc/openvswitch', 'conf.db'), +- path_join('/etc/openvswitch', 'conf.db'), +- path_join('/var/lib/openvswitch', 'conf.db'), ++ self.path_join('/usr/local/etc/openvswitch', 'conf.db'), ++ self.path_join('/etc/openvswitch', 'conf.db'), ++ self.path_join('/var/lib/openvswitch', 'conf.db'), + ]) + ovs_dbdir = environ.get('OVS_DBDIR') + if ovs_dbdir: +- self.add_copy_spec(path_join(ovs_dbdir, 'conf.db')) ++ self.add_copy_spec(self.path_join(ovs_dbdir, 'conf.db')) + + self.add_cmd_output([ + # The '-t 5' adds an upper bound on how long to wait to connect +diff --git a/sos/report/plugins/origin.py b/sos/report/plugins/origin.py +index f9cc32c1..7df9c019 100644 +--- a/sos/report/plugins/origin.py ++++ b/sos/report/plugins/origin.py +@@ -69,20 +69,21 @@ class OpenShiftOrigin(Plugin): + + def is_static_etcd(self): + """Determine if we are on a node running etcd""" +- return self.path_exists(os.path.join(self.static_pod_dir, "etcd.yaml")) ++ return self.path_exists(self.path_join(self.static_pod_dir, ++ "etcd.yaml")) + + def is_static_pod_compatible(self): + """Determine if a node is running static pods""" + return self.path_exists(self.static_pod_dir) + + def setup(self): +- bstrap_node_cfg = os.path.join(self.node_base_dir, +- "bootstrap-" + self.node_cfg_file) +- bstrap_kubeconfig = os.path.join(self.node_base_dir, +- "bootstrap.kubeconfig") +- node_certs = os.path.join(self.node_base_dir, "certs", "*") +- node_client_ca = os.path.join(self.node_base_dir, "client-ca.crt") +- admin_cfg = os.path.join(self.master_base_dir, "admin.kubeconfig") ++ bstrap_node_cfg = self.path_join(self.node_base_dir, ++ "bootstrap-" + self.node_cfg_file) ++ bstrap_kubeconfig = self.path_join(self.node_base_dir, ++ "bootstrap.kubeconfig") ++ node_certs = self.path_join(self.node_base_dir, "certs", "*") ++ node_client_ca = self.path_join(self.node_base_dir, "client-ca.crt") ++ admin_cfg = self.path_join(self.master_base_dir, "admin.kubeconfig") + oc_cmd_admin = "%s --config=%s" % ("oc", admin_cfg) + static_pod_logs_cmd = "master-logs" + +@@ -92,11 +93,12 @@ class OpenShiftOrigin(Plugin): + self.add_copy_spec([ + self.master_cfg, + self.master_env, +- os.path.join(self.master_base_dir, "*.crt"), ++ self.path_join(self.master_base_dir, "*.crt"), + ]) + + if self.is_static_pod_compatible(): +- self.add_copy_spec(os.path.join(self.static_pod_dir, "*.yaml")) ++ self.add_copy_spec(self.path_join(self.static_pod_dir, ++ "*.yaml")) + self.add_cmd_output([ + "%s api api" % static_pod_logs_cmd, + "%s controllers controllers" % static_pod_logs_cmd, +@@ -177,9 +179,9 @@ class OpenShiftOrigin(Plugin): + node_client_ca, + bstrap_node_cfg, + bstrap_kubeconfig, +- os.path.join(self.node_base_dir, "*.crt"), +- os.path.join(self.node_base_dir, "resolv.conf"), +- os.path.join(self.node_base_dir, "node-dnsmasq.conf"), ++ self.path_join(self.node_base_dir, "*.crt"), ++ self.path_join(self.node_base_dir, "resolv.conf"), ++ self.path_join(self.node_base_dir, "node-dnsmasq.conf"), + ]) + + self.add_journal(units="atomic-openshift-node") +diff --git a/sos/report/plugins/ovirt.py b/sos/report/plugins/ovirt.py +index 1de606be..09647bf1 100644 +--- a/sos/report/plugins/ovirt.py ++++ b/sos/report/plugins/ovirt.py +@@ -216,7 +216,7 @@ class Ovirt(Plugin, RedHatPlugin): + "isouploader.conf" + ] + for conf_file in passwd_files: +- conf_path = os.path.join("/etc/ovirt-engine", conf_file) ++ conf_path = self.path_join("/etc/ovirt-engine", conf_file) + self.do_file_sub( + conf_path, + r"passwd=(.*)", +diff --git a/sos/report/plugins/ovirt_engine_backup.py b/sos/report/plugins/ovirt_engine_backup.py +index 676e419e..7fb6a5c7 100644 +--- a/sos/report/plugins/ovirt_engine_backup.py ++++ b/sos/report/plugins/ovirt_engine_backup.py +@@ -8,7 +8,6 @@ + # + # See the LICENSE file in the source distribution for further information. + +-import os + from sos.report.plugins import (Plugin, RedHatPlugin) + from datetime import datetime + +@@ -29,11 +28,11 @@ class oVirtEngineBackup(Plugin, RedHatPlugin): + + def setup(self): + now = datetime.now().strftime("%Y%m%d%H%M%S") +- backup_filename = os.path.join( ++ backup_filename = self.path_join( + self.get_option("backupdir"), + "engine-db-backup-%s.tar.gz" % (now) + ) +- log_filename = os.path.join( ++ log_filename = self.path_join( + self.get_option("backupdir"), + "engine-db-backup-%s.log" % (now) + ) +diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py +index d6647aad..914eda60 100644 +--- a/sos/report/plugins/ovn_central.py ++++ b/sos/report/plugins/ovn_central.py +@@ -42,7 +42,7 @@ class OVNCentral(Plugin): + return + else: + try: +- with open(filename, 'r') as f: ++ with open(self.path_join(filename), 'r') as f: + try: + db = json.load(f) + except Exception: +@@ -71,13 +71,13 @@ class OVNCentral(Plugin): + ovs_rundir = os.environ.get('OVS_RUNDIR') + for pidfile in ['ovnnb_db.pid', 'ovnsb_db.pid', 'ovn-northd.pid']: + self.add_copy_spec([ +- os.path.join('/var/lib/openvswitch/ovn', pidfile), +- os.path.join('/usr/local/var/run/openvswitch', pidfile), +- os.path.join('/run/openvswitch/', pidfile), ++ self.path_join('/var/lib/openvswitch/ovn', pidfile), ++ self.path_join('/usr/local/var/run/openvswitch', pidfile), ++ self.path_join('/run/openvswitch/', pidfile), + ]) + + if ovs_rundir: +- self.add_copy_spec(os.path.join(ovs_rundir, pidfile)) ++ self.add_copy_spec(self.path_join(ovs_rundir, pidfile)) + + if self.get_option("all_logs"): + self.add_copy_spec("/var/log/ovn/") +@@ -104,7 +104,7 @@ class OVNCentral(Plugin): + + schema_dir = '/usr/share/openvswitch' + +- nb_tables = self.get_tables_from_schema(os.path.join( ++ nb_tables = self.get_tables_from_schema(self.path_join( + schema_dir, 'ovn-nb.ovsschema')) + + self.add_database_output(nb_tables, nbctl_cmds, 'ovn-nbctl') +@@ -116,7 +116,7 @@ class OVNCentral(Plugin): + format(self.ovn_sbdb_sock_path), + "output": "Leader: self"} + if self.test_predicate(self, pred=SoSPredicate(self, cmd_outputs=co)): +- sb_tables = self.get_tables_from_schema(os.path.join( ++ sb_tables = self.get_tables_from_schema(self.path_join( + schema_dir, 'ovn-sb.ovsschema'), ['Logical_Flow']) + self.add_database_output(sb_tables, sbctl_cmds, 'ovn-sbctl') + cmds += sbctl_cmds +@@ -134,14 +134,14 @@ class OVNCentral(Plugin): + ovs_dbdir = os.environ.get('OVS_DBDIR') + for dbfile in ['ovnnb_db.db', 'ovnsb_db.db']: + self.add_copy_spec([ +- os.path.join('/var/lib/openvswitch/ovn', dbfile), +- os.path.join('/usr/local/etc/openvswitch', dbfile), +- os.path.join('/etc/openvswitch', dbfile), +- os.path.join('/var/lib/openvswitch', dbfile), +- os.path.join('/var/lib/ovn/etc', dbfile), ++ self.path_join('/var/lib/openvswitch/ovn', dbfile), ++ self.path_join('/usr/local/etc/openvswitch', dbfile), ++ self.path_join('/etc/openvswitch', dbfile), ++ self.path_join('/var/lib/openvswitch', dbfile), ++ self.path_join('/var/lib/ovn/etc', dbfile) + ]) + if ovs_dbdir: +- self.add_copy_spec(os.path.join(ovs_dbdir, dbfile)) ++ self.add_copy_spec(self.path_join(ovs_dbdir, dbfile)) + + self.add_journal(units="ovn-northd") + +diff --git a/sos/report/plugins/ovn_host.py b/sos/report/plugins/ovn_host.py +index 3742c49f..78604a15 100644 +--- a/sos/report/plugins/ovn_host.py ++++ b/sos/report/plugins/ovn_host.py +@@ -35,7 +35,7 @@ class OVNHost(Plugin): + else: + self.add_copy_spec("/var/log/ovn/*.log") + +- self.add_copy_spec([os.path.join(pp, pidfile) for pp in pid_paths]) ++ self.add_copy_spec([self.path_join(pp, pidfile) for pp in pid_paths]) + + self.add_copy_spec('/etc/sysconfig/ovn-controller') + +@@ -49,7 +49,7 @@ class OVNHost(Plugin): + + def check_enabled(self): + return (any([self.path_isfile( +- os.path.join(pp, pidfile)) for pp in pid_paths]) or ++ self.path_join(pp, pidfile)) for pp in pid_paths]) or + super(OVNHost, self).check_enabled()) + + +diff --git a/sos/report/plugins/pacemaker.py b/sos/report/plugins/pacemaker.py +index 497807ff..6ce80881 100644 +--- a/sos/report/plugins/pacemaker.py ++++ b/sos/report/plugins/pacemaker.py +@@ -129,7 +129,7 @@ class Pacemaker(Plugin): + + class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin): + def setup(self): +- self.envfile = "/etc/default/pacemaker" ++ self.envfile = self.path_join("/etc/default/pacemaker") + self.setup_crm_shell() + self.setup_pcs() + super(DebianPacemaker, self).setup() +@@ -141,7 +141,7 @@ class DebianPacemaker(Pacemaker, DebianPlugin, UbuntuPlugin): + + class RedHatPacemaker(Pacemaker, RedHatPlugin): + def setup(self): +- self.envfile = "/etc/sysconfig/pacemaker" ++ self.envfile = self.path_join("/etc/sysconfig/pacemaker") + self.setup_pcs() + self.add_copy_spec("/etc/sysconfig/sbd") + super(RedHatPacemaker, self).setup() +diff --git a/sos/report/plugins/pcp.py b/sos/report/plugins/pcp.py +index 9707d7a9..ad902332 100644 +--- a/sos/report/plugins/pcp.py ++++ b/sos/report/plugins/pcp.py +@@ -41,7 +41,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: +- fp = os.path.join(dirpath, f) ++ fp = self.path_join(dirpath, f) + total_size += os.path.getsize(fp) + return total_size + +@@ -86,7 +86,7 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): + # unconditionally. Obviously if someone messes up their /etc/pcp.conf + # in a ridiculous way (i.e. setting PCP_SYSCONF_DIR to '/') this will + # break badly. +- var_conf_dir = os.path.join(self.pcp_var_dir, 'config') ++ var_conf_dir = self.path_join(self.pcp_var_dir, 'config') + self.add_copy_spec([ + self.pcp_sysconf_dir, + self.pcp_conffile, +@@ -98,10 +98,10 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): + # rpms. It does not make up for a lot of size but it contains many + # files + self.add_forbidden_path([ +- os.path.join(var_conf_dir, 'pmchart'), +- os.path.join(var_conf_dir, 'pmlogconf'), +- os.path.join(var_conf_dir, 'pmieconf'), +- os.path.join(var_conf_dir, 'pmlogrewrite') ++ self.path_join(var_conf_dir, 'pmchart'), ++ self.path_join(var_conf_dir, 'pmlogconf'), ++ self.path_join(var_conf_dir, 'pmieconf'), ++ self.path_join(var_conf_dir, 'pmlogrewrite') + ]) + + # Take PCP_LOG_DIR/pmlogger/`hostname` + PCP_LOG_DIR/pmmgr/`hostname` +@@ -121,13 +121,13 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): + # we would collect everything + if self.pcp_hostname != '': + # collect pmmgr logs up to 'pmmgrlogs' size limit +- path = os.path.join(self.pcp_log_dir, 'pmmgr', +- self.pcp_hostname, '*') ++ path = self.path_join(self.pcp_log_dir, 'pmmgr', ++ self.pcp_hostname, '*') + self.add_copy_spec(path, sizelimit=self.sizelimit, tailit=False) + # collect newest pmlogger logs up to 'pmloggerfiles' count + files_collected = 0 +- path = os.path.join(self.pcp_log_dir, 'pmlogger', +- self.pcp_hostname, '*') ++ path = self.path_join(self.pcp_log_dir, 'pmlogger', ++ self.pcp_hostname, '*') + pmlogger_ls = self.exec_cmd("ls -t1 %s" % path) + if pmlogger_ls['status'] == 0: + for line in pmlogger_ls['output'].splitlines(): +@@ -138,15 +138,15 @@ class Pcp(Plugin, RedHatPlugin, DebianPlugin): + + self.add_copy_spec([ + # Collect PCP_LOG_DIR/pmcd and PCP_LOG_DIR/NOTICES +- os.path.join(self.pcp_log_dir, 'pmcd'), +- os.path.join(self.pcp_log_dir, 'NOTICES*'), ++ self.path_join(self.pcp_log_dir, 'pmcd'), ++ self.path_join(self.pcp_log_dir, 'NOTICES*'), + # Collect PCP_VAR_DIR/pmns +- os.path.join(self.pcp_var_dir, 'pmns'), ++ self.path_join(self.pcp_var_dir, 'pmns'), + # Also collect any other log and config files + # (as suggested by fche) +- os.path.join(self.pcp_log_dir, '*/*.log*'), +- os.path.join(self.pcp_log_dir, '*/*/*.log*'), +- os.path.join(self.pcp_log_dir, '*/*/config*') ++ self.path_join(self.pcp_log_dir, '*/*.log*'), ++ self.path_join(self.pcp_log_dir, '*/*/*.log*'), ++ self.path_join(self.pcp_log_dir, '*/*/config*') + ]) + + # Collect a summary for the current day +diff --git a/sos/report/plugins/postfix.py b/sos/report/plugins/postfix.py +index 8f584430..3ca0c4ad 100644 +--- a/sos/report/plugins/postfix.py ++++ b/sos/report/plugins/postfix.py +@@ -41,7 +41,7 @@ class Postfix(Plugin): + ] + fp = [] + try: +- with open('/etc/postfix/main.cf', 'r') as cffile: ++ with open(self.path_join('/etc/postfix/main.cf'), 'r') as cffile: + for line in cffile.readlines(): + # ignore comments and take the first word after '=' + if line.startswith('#'): +diff --git a/sos/report/plugins/postgresql.py b/sos/report/plugins/postgresql.py +index bec0b019..00824db7 100644 +--- a/sos/report/plugins/postgresql.py ++++ b/sos/report/plugins/postgresql.py +@@ -124,7 +124,7 @@ class RedHatPostgreSQL(PostgreSQL, SCLPlugin): + + # copy PG_VERSION and postmaster.opts + for f in ["PG_VERSION", "postmaster.opts"]: +- self.add_copy_spec(os.path.join(_dir, "data", f)) ++ self.add_copy_spec(self.path_join(_dir, "data", f)) + + + class DebianPostgreSQL(PostgreSQL, DebianPlugin, UbuntuPlugin): +diff --git a/sos/report/plugins/powerpc.py b/sos/report/plugins/powerpc.py +index 4fb4f87c..50f88650 100644 +--- a/sos/report/plugins/powerpc.py ++++ b/sos/report/plugins/powerpc.py +@@ -22,7 +22,7 @@ class PowerPC(Plugin, IndependentPlugin): + + def setup(self): + try: +- with open('/proc/cpuinfo', 'r') as fp: ++ with open(self.path_join('/proc/cpuinfo'), 'r') as fp: + contents = fp.read() + ispSeries = "pSeries" in contents + isPowerNV = "PowerNV" in contents +diff --git a/sos/report/plugins/processor.py b/sos/report/plugins/processor.py +index 2df2dc9a..c3d8930c 100644 +--- a/sos/report/plugins/processor.py ++++ b/sos/report/plugins/processor.py +@@ -7,7 +7,6 @@ + # See the LICENSE file in the source distribution for further information. + + from sos.report.plugins import Plugin, IndependentPlugin +-import os + + + class Processor(Plugin, IndependentPlugin): +@@ -41,7 +40,7 @@ class Processor(Plugin, IndependentPlugin): + # cumulative directory size exceeds 25MB or even 100MB. + cdirs = self.listdir('/sys/devices/system/cpu') + self.add_copy_spec([ +- os.path.join('/sys/devices/system/cpu', cdir) for cdir in cdirs ++ self.path_join('/sys/devices/system/cpu', cdir) for cdir in cdirs + ]) + + self.add_cmd_output([ +diff --git a/sos/report/plugins/python.py b/sos/report/plugins/python.py +index e2ab39ab..a8ec0cd8 100644 +--- a/sos/report/plugins/python.py ++++ b/sos/report/plugins/python.py +@@ -68,9 +68,9 @@ class RedHatPython(Python, RedHatPlugin): + ] + + for py_path in py_paths: +- for root, _, files in os.walk(py_path): ++ for root, _, files in os.walk(self.path_join(py_path)): + for file_ in files: +- filepath = os.path.join(root, file_) ++ filepath = self.path_join(root, file_) + if filepath.endswith('.py'): + try: + with open(filepath, 'rb') as f: +diff --git a/sos/report/plugins/sar.py b/sos/report/plugins/sar.py +index 669f5d7b..b60005b1 100644 +--- a/sos/report/plugins/sar.py ++++ b/sos/report/plugins/sar.py +@@ -7,7 +7,6 @@ + # See the LICENSE file in the source distribution for further information. + + from sos.report.plugins import Plugin, RedHatPlugin, DebianPlugin, UbuntuPlugin +-import os + import re + + +@@ -24,7 +23,7 @@ class Sar(Plugin,): + "", False)] + + def setup(self): +- self.add_copy_spec(os.path.join(self.sa_path, '*'), ++ self.add_copy_spec(self.path_join(self.sa_path, '*'), + sizelimit=0 if self.get_option("all_sar") else None, + tailit=False) + +@@ -44,7 +43,7 @@ class Sar(Plugin,): + # as option for sadc + for fname in dir_list: + if sa_regex.match(fname): +- sa_data_path = os.path.join(self.sa_path, fname) ++ sa_data_path = self.path_join(self.sa_path, fname) + sar_filename = 'sar' + fname[2:] + if sar_filename not in dir_list: + sar_cmd = 'sh -c "sar -A -f %s"' % sa_data_path +diff --git a/sos/report/plugins/sos_extras.py b/sos/report/plugins/sos_extras.py +index ffde4138..55bc4dc0 100644 +--- a/sos/report/plugins/sos_extras.py ++++ b/sos/report/plugins/sos_extras.py +@@ -58,7 +58,7 @@ class SosExtras(Plugin, IndependentPlugin): + + for path, dirlist, filelist in os.walk(self.extras_dir): + for f in filelist: +- _file = os.path.join(path, f) ++ _file = self.path_join(path, f) + self._log_warn("Collecting data from extras file %s" % _file) + try: + for line in open(_file).read().splitlines(): +diff --git a/sos/report/plugins/ssh.py b/sos/report/plugins/ssh.py +index 971cda8b..9ac9dec0 100644 +--- a/sos/report/plugins/ssh.py ++++ b/sos/report/plugins/ssh.py +@@ -42,7 +41,7 @@ class Ssh(Plugin, IndependentPlugin): + try: + for sshcfg in sshcfgs: + tag = sshcfg.split('/')[-1] +- with open(sshcfg, 'r') as cfgfile: ++ with open(self.path_join(sshcfg), 'r') as cfgfile: + for line in cfgfile: + # skip empty lines and comments + if len(line.split()) == 0 or line.startswith('#'): +diff --git a/sos/report/plugins/unpackaged.py b/sos/report/plugins/unpackaged.py +index 9205e53f..772b1d1f 100644 +--- a/sos/report/plugins/unpackaged.py ++++ b/sos/report/plugins/unpackaged.py +@@ -40,7 +40,7 @@ class Unpackaged(Plugin, RedHatPlugin): + for e in exclude: + dirs[:] = [d for d in dirs if d not in e] + for name in files: +- path = os.path.join(root, name) ++ path = self.path_join(root, name) + try: + if stat.S_ISLNK(os.lstat(path).st_mode): + path = Path(path).resolve() +@@ -49,7 +49,7 @@ class Unpackaged(Plugin, RedHatPlugin): + file_list.append(os.path.realpath(path)) + for name in dirs: + file_list.append(os.path.realpath( +- os.path.join(root, name))) ++ self.path_join(root, name))) + + return file_list + +diff --git a/sos/report/plugins/watchdog.py b/sos/report/plugins/watchdog.py +index 1bf3f4cb..bf2dc9cb 100644 +--- a/sos/report/plugins/watchdog.py ++++ b/sos/report/plugins/watchdog.py +@@ -11,7 +11,6 @@ + from sos.report.plugins import Plugin, RedHatPlugin + + from glob import glob +-import os + + + class Watchdog(Plugin, RedHatPlugin): +@@ -56,8 +55,8 @@ class Watchdog(Plugin, RedHatPlugin): + Collect configuration files, custom executables for test-binary + and repair-binary, and stdout/stderr logs. + """ +- conf_file = self.get_option('conf_file') +- log_dir = '/var/log/watchdog' ++ conf_file = self.path_join(self.get_option('conf_file')) ++ log_dir = self.path_join('/var/log/watchdog') + + # Get service configuration and sysconfig files + self.add_copy_spec([ +@@ -80,15 +79,15 @@ class Watchdog(Plugin, RedHatPlugin): + self._log_warn("Could not read %s: %s" % (conf_file, ex)) + + if self.get_option('all_logs'): +- log_files = glob(os.path.join(log_dir, '*')) ++ log_files = glob(self.path_join(log_dir, '*')) + else: +- log_files = (glob(os.path.join(log_dir, '*.stdout')) + +- glob(os.path.join(log_dir, '*.stderr'))) ++ log_files = (glob(self.path_join(log_dir, '*.stdout')) + ++ glob(self.path_join(log_dir, '*.stderr'))) + + self.add_copy_spec(log_files) + + # Get output of "wdctl " for each /dev/watchdog* +- for dev in glob('/dev/watchdog*'): ++ for dev in glob(self.path_join('/dev/watchdog*')): + self.add_cmd_output("wdctl %s" % dev) + + # vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/yum.py b/sos/report/plugins/yum.py +index 148464cb..e5256642 100644 +--- a/sos/report/plugins/yum.py ++++ b/sos/report/plugins/yum.py +@@ -61,7 +61,7 @@ class Yum(Plugin, RedHatPlugin): + if not p.endswith(".py"): + continue + plugins = plugins + " " if len(plugins) else "" +- plugins = plugins + os.path.join(YUM_PLUGIN_PATH, p) ++ plugins = plugins + self.path_join(YUM_PLUGIN_PATH, p) + if len(plugins): + self.add_cmd_output("rpm -qf %s" % plugins, + suggest_filename="plugin-packages") +-- +2.31.1 + +From f4af5efdc79aefe1aa685c36d095925bae14dc4a Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 28 Sep 2021 13:00:17 -0400 +Subject: [PATCH 1/4] [collect] Add --transport option and allow clusters to + set transport type + +Adds a new `--transport` option for users to be able to specify the type +of transport to use when connecting to nodes. The default value of +`auto` will defer to the cluster profile to set the transport type, +which will continue to default to use OpenSSH's ControlPersist feature. + +Clusters may override the new `set_transport_type()` method to change +the default transport used. + +If `--transport` is anything besides `auto`, then the cluster profile +will not be deferred to when choosing a transport for each remote node. + +Signed-off-by: Jake Hunsaker +--- + man/en/sos-collect.1 | 15 +++++++++++++++ + sos/collector/__init__.py | 6 ++++++ + sos/collector/clusters/__init__.py | 10 ++++++++++ + sos/collector/exceptions.py | 13 ++++++++++++- + sos/collector/sosnode.py | 16 +++++++++++++++- + 5 files changed, 58 insertions(+), 2 deletions(-) + +diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 +index e930023e..8ad4fe5e 100644 +--- a/man/en/sos-collect.1 ++++ b/man/en/sos-collect.1 +@@ -43,6 +43,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes + [\-\-sos-cmd SOS_CMD] + [\-t|\-\-threads THREADS] + [\-\-timeout TIMEOUT] ++ [\-\-transport TRANSPORT] + [\-\-tmp\-dir TMP_DIR] + [\-v|\-\-verbose] + [\-\-verify] +@@ -350,6 +351,20 @@ Note that sosreports are collected in parallel, so you can approximate the total + runtime of sos collect via timeout*(number of nodes/jobs). + + Default is 180 seconds. ++.TP ++\fB\-\-transport\fR TRANSPORT ++Specify the type of remote transport to use to manage connections to remote nodes. ++ ++\fBsos collect\fR uses locally installed binaries to connect to and interact with remote ++nodes, instead of directly establishing those connections. By default, OpenSSH's ControlPersist ++feature is preferred, however certain cluster types may have preferences of their own for how ++remote sessions should be established. ++ ++The types of transports supported are currently as follows: ++ ++ \fBauto\fR Allow the cluster type to determine the transport used ++ \fBcontrol_persist\fR Use OpenSSH's ControlPersist feature. This is the default behavior ++ + .TP + \fB\-\-tmp\-dir\fR TMP_DIR + Specify a temporary directory to save sos archives to. By default one will be created in +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index da912655..fecfe6aa 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -98,6 +98,7 @@ class SoSCollector(SoSComponent): + 'ssh_port': 22, + 'ssh_user': 'root', + 'timeout': 600, ++ 'transport': 'auto', + 'verify': False, + 'usernames': [], + 'upload': False, +@@ -378,6 +379,8 @@ class SoSCollector(SoSComponent): + help='Specify an SSH user. Default root') + collect_grp.add_argument('--timeout', type=int, required=False, + help='Timeout for sosreport on each node.') ++ collect_grp.add_argument('--transport', default='auto', type=str, ++ help='Remote connection transport to use') + collect_grp.add_argument("--upload", action="store_true", + default=False, + help="Upload archive to a policy-default " +@@ -813,6 +813,8 @@ class SoSCollector(SoSComponent): + self.collect_md.add_field('cluster_type', self.cluster_type) + if self.cluster: + self.master.cluster = self.cluster ++ if self.opts.transport == 'auto': ++ self.opts.transport = self.cluster.set_transport_type() + self.cluster.setup() + if self.cluster.cluster_ssh_key: + if not self.opts.ssh_key: +@@ -1041,6 +1046,7 @@ class SoSCollector(SoSComponent): + else: + client.disconnect() + except Exception: ++ # all exception logging is handled within SoSNode + pass + + def intro(self): +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index 64ac2a44..cf1e7a0b 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -149,6 +149,16 @@ class Cluster(): + """ + pass + ++ def set_transport_type(self): ++ """The default connection type used by sos collect is to leverage the ++ local system's SSH installation using ControlPersist, however certain ++ cluster types may want to use something else. ++ ++ Override this in a specific cluster profile to set the ``transport`` ++ option according to what type of transport should be used. ++ """ ++ return 'control_persist' ++ + def set_master_options(self, node): + """If there is a need to set specific options in the sos command being + run on the cluster's master nodes, override this method in the cluster +diff --git a/sos/collector/exceptions.py b/sos/collector/exceptions.py +index 1e44768b..2bb07e7b 100644 +--- a/sos/collector/exceptions.py ++++ b/sos/collector/exceptions.py +@@ -94,6 +94,16 @@ class UnsupportedHostException(Exception): + super(UnsupportedHostException, self).__init__(message) + + ++class InvalidTransportException(Exception): ++ """Raised when a transport is requested but it does not exist or is ++ not supported locally""" ++ ++ def __init__(self, transport=None): ++ message = ("Connection failed: unknown or unsupported transport %s" ++ % transport if transport else '') ++ super(InvalidTransportException, self).__init__(message) ++ ++ + __all__ = [ + 'AuthPermissionDeniedException', + 'CommandTimeoutException', +@@ -104,5 +114,6 @@ __all__ = [ + 'InvalidPasswordException', + 'PasswordRequestException', + 'TimeoutPasswordAuthException', +- 'UnsupportedHostException' ++ 'UnsupportedHostException', ++ 'InvalidTransportException' + ] +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index f79bd5ff..5c5c7201 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -22,7 +22,13 @@ from sos.collector.transports.control_persist import SSHControlPersist + from sos.collector.transports.local import LocalTransport + from sos.collector.exceptions import (CommandTimeoutException, + ConnectionException, +- UnsupportedHostException) ++ UnsupportedHostException, ++ InvalidTransportException) ++ ++TRANSPORTS = { ++ 'local': LocalTransport, ++ 'control_persist': SSHControlPersist, ++} + + + class SosNode(): +@@ -107,6 +113,14 @@ class SosNode(): + if self.address in ['localhost', '127.0.0.1']: + self.local = True + return LocalTransport(self.address, commons) ++ elif self.opts.transport in TRANSPORTS.keys(): ++ return TRANSPORTS[self.opts.transport](self.address, commons) ++ elif self.opts.transport != 'auto': ++ self.log_error( ++ "Connection failed: unknown or unsupported transport %s" ++ % self.opts.transport ++ ) ++ raise InvalidTransportException(self.opts.transport) + return SSHControlPersist(self.address, commons) + + def _fmt_msg(self, msg): +-- +2.31.1 + + +From dbc49345384404600f45d68b8d3c6541b2a26480 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 30 Sep 2021 10:38:18 -0400 +Subject: [PATCH 2/4] [transports] Add 'oc' as a transport option for remote + nodes + +This commit adds a new transport for `sos collect` by leveraging a +locally available `oc` binary that has been properly configured for +access to an OCP cluster. + +This transport will allow users to use `sos collect` to collect reports +from an OCP cluster without directly connecting to any of the nodes +involved. We do this by using the `oc` binary to first launch a pod on +target node(s) and then exec our discovery commands and eventual `sos +report` command to that pod. This in turn is dependent on a function API +for the `oc` binary to communicate with. In the event that `oc` is not +__locally__ available or is not properly configured, we will fallback to +the current default of using SSH ControlPersist to directly connect to +the nodes. Otherwise, the OCP cluster will attempt to automatically use +this new transport. +--- + man/en/sos-collect.1 | 1 + + sos/collector/clusters/ocp.py | 14 ++ + sos/collector/sosnode.py | 8 +- + sos/collector/transports/__init__.py | 20 ++- + sos/collector/transports/oc.py | 220 +++++++++++++++++++++++++++ + 5 files changed, 257 insertions(+), 6 deletions(-) + create mode 100644 sos/collector/transports/oc.py + +diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 +index 8ad4fe5e..a1f6c10e 100644 +--- a/man/en/sos-collect.1 ++++ b/man/en/sos-collect.1 +@@ -364,6 +364,7 @@ The types of transports supported are currently as follows: + + \fBauto\fR Allow the cluster type to determine the transport used + \fBcontrol_persist\fR Use OpenSSH's ControlPersist feature. This is the default behavior ++ \fBoc\fR Use a \fBlocally\fR configured \fBoc\fR binary to deploy collection pods on OCP nodes + + .TP + \fB\-\-tmp\-dir\fR TMP_DIR +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index ad97587f..a9357dbf 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -12,6 +12,7 @@ import os + + from pipes import quote + from sos.collector.clusters import Cluster ++from sos.utilities import is_executable + + + class ocp(Cluster): +@@ -83,6 +84,19 @@ class ocp(Cluster): + nodes[_node[0]][column] = _node[idx[column]] + return nodes + ++ def set_transport_type(self): ++ if is_executable('oc'): ++ return 'oc' ++ self.log_info("Local installation of 'oc' not found or is not " ++ "correctly configured. Will use ControlPersist") ++ self.ui_log.warn( ++ "Preferred transport 'oc' not available, will fallback to SSH." ++ ) ++ if not self.opts.batch: ++ input("Press ENTER to continue connecting with SSH, or Ctrl+C to" ++ "abort.") ++ return 'control_persist' ++ + def get_nodes(self): + nodes = [] + self.node_dict = {} +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 5c5c7201..8a9dbd7a 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -20,6 +20,7 @@ from sos.policies import load + from sos.policies.init_systems import InitSystem + from sos.collector.transports.control_persist import SSHControlPersist + from sos.collector.transports.local import LocalTransport ++from sos.collector.transports.oc import OCTransport + from sos.collector.exceptions import (CommandTimeoutException, + ConnectionException, + UnsupportedHostException, +@@ -28,6 +29,7 @@ from sos.collector.exceptions import (CommandTimeoutException, + TRANSPORTS = { + 'local': LocalTransport, + 'control_persist': SSHControlPersist, ++ 'oc': OCTransport + } + + +@@ -421,13 +423,11 @@ class SosNode(): + if 'atomic' in cmd: + get_pty = True + +- if get_pty: +- cmd = "/bin/bash -c %s" % quote(cmd) +- + if env: + _cmd_env = self.env_vars + env = _cmd_env.update(env) +- return self._transport.run_command(cmd, timeout, need_root, env) ++ return self._transport.run_command(cmd, timeout, need_root, env, ++ get_pty) + + def sosreport(self): + """Run an sos report on the node, then collect it""" +diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py +index 5be7dc6d..7bffee62 100644 +--- a/sos/collector/transports/__init__.py ++++ b/sos/collector/transports/__init__.py +@@ -144,7 +144,8 @@ class RemoteTransport(): + raise NotImplementedError("Transport %s does not define disconnect" + % self.name) + +- def run_command(self, cmd, timeout=180, need_root=False, env=None): ++ def run_command(self, cmd, timeout=180, need_root=False, env=None, ++ get_pty=False): + """Run a command on the node, returning its output and exit code. + This should return the exit code of the command being executed, not the + exit code of whatever mechanism the transport uses to execute that +@@ -165,10 +166,15 @@ class RemoteTransport(): + :param env: Specify env vars to be passed to the ``cmd`` + :type env: ``dict`` + ++ :param get_pty: Does ``cmd`` require execution with a pty? ++ :type get_pty: ``bool`` ++ + :returns: Output of ``cmd`` and the exit code + :rtype: ``dict`` with keys ``output`` and ``status`` + """ + self.log_debug('Running command %s' % cmd) ++ if get_pty: ++ cmd = "/bin/bash -c %s" % quote(cmd) + # currently we only use/support the use of pexpect for handling the + # execution of these commands, as opposed to directly invoking + # subprocess.Popen() in conjunction with tools like sshpass. +@@ -212,6 +218,13 @@ class RemoteTransport(): + :type env: ``dict`` + """ + cmd = self._format_cmd_for_exec(cmd) ++ ++ # if for any reason env is empty, set it to None as otherwise ++ # pexpect interprets this to mean "run this command with no env vars of ++ # any kind" ++ if not env: ++ env = None ++ + result = pexpect.spawn(cmd, encoding='utf-8', env=env) + + _expects = [pexpect.EOF, pexpect.TIMEOUT] +@@ -268,6 +281,9 @@ class RemoteTransport(): + _out = self.run_command('hostname') + if _out['status'] == 0: + self._hostname = _out['output'].strip() ++ ++ if not self._hostname: ++ self._hostname = self.address + self.log_info("Hostname set to %s" % self._hostname) + return self._hostname + +@@ -302,7 +318,7 @@ class RemoteTransport(): + return self._read_file(fname) + + def _read_file(self, fname): +- res = self.run_command("cat %s" % fname, timeout=5) ++ res = self.run_command("cat %s" % fname, timeout=10) + if res['status'] == 0: + return res['output'] + else: +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +new file mode 100644 +index 00000000..649037b9 +--- /dev/null ++++ b/sos/collector/transports/oc.py +@@ -0,0 +1,220 @@ ++# Copyright Red Hat 2021, Jake Hunsaker ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++import json ++import tempfile ++import os ++ ++from sos.collector.transports import RemoteTransport ++from sos.utilities import (is_executable, sos_get_command_output, ++ SoSTimeoutError) ++ ++ ++class OCTransport(RemoteTransport): ++ """This transport leverages the execution of commands via a locally ++ available and configured ``oc`` binary for OCPv4 environments. ++ ++ OCPv4 clusters generally discourage the use of SSH, so this transport may ++ be used to remove our use of SSH in favor of the environment provided ++ method of connecting to nodes and executing commands via debug pods. ++ ++ Note that this approach will generate multiple debug pods over the course ++ of our execution ++ """ ++ ++ name = 'oc' ++ project = 'sos-collect-tmp' ++ ++ def run_oc(self, cmd, **kwargs): ++ """Format and run a command with `oc` in the project defined for our ++ execution ++ """ ++ return sos_get_command_output( ++ "oc -n sos-collect-tmp %s" % cmd, ++ **kwargs ++ ) ++ ++ @property ++ def connected(self): ++ up = self.run_oc( ++ "wait --timeout=0s --for=condition=ready pod/%s" % self.pod_name ++ ) ++ return up['status'] == 0 ++ ++ def get_node_pod_config(self): ++ """Based on our template for the debug container, add the node-specific ++ items so that we can deploy one of these on each node we're collecting ++ from ++ """ ++ return { ++ "kind": "Pod", ++ "apiVersion": "v1", ++ "metadata": { ++ "name": "%s-sos-collector" % self.address.split('.')[0], ++ "namespace": "sos-collect-tmp" ++ }, ++ "priorityClassName": "system-cluster-critical", ++ "spec": { ++ "volumes": [ ++ { ++ "name": "host", ++ "hostPath": { ++ "path": "/", ++ "type": "Directory" ++ } ++ }, ++ { ++ "name": "run", ++ "hostPath": { ++ "path": "/run", ++ "type": "Directory" ++ } ++ }, ++ { ++ "name": "varlog", ++ "hostPath": { ++ "path": "/var/log", ++ "type": "Directory" ++ } ++ }, ++ { ++ "name": "machine-id", ++ "hostPath": { ++ "path": "/etc/machine-id", ++ "type": "File" ++ } ++ } ++ ], ++ "containers": [ ++ { ++ "name": "sos-collector-tmp", ++ "image": "registry.redhat.io/rhel8/support-tools", ++ "command": [ ++ "/bin/bash" ++ ], ++ "env": [ ++ { ++ "name": "HOST", ++ "value": "/host" ++ } ++ ], ++ "resources": {}, ++ "volumeMounts": [ ++ { ++ "name": "host", ++ "mountPath": "/host" ++ }, ++ { ++ "name": "run", ++ "mountPath": "/run" ++ }, ++ { ++ "name": "varlog", ++ "mountPath": "/var/log" ++ }, ++ { ++ "name": "machine-id", ++ "mountPath": "/etc/machine-id" ++ } ++ ], ++ "securityContext": { ++ "privileged": True, ++ "runAsUser": 0 ++ }, ++ "stdin": True, ++ "stdinOnce": True, ++ "tty": True ++ } ++ ], ++ "restartPolicy": "Never", ++ "nodeName": self.address, ++ "hostNetwork": True, ++ "hostPID": True, ++ "hostIPC": True ++ } ++ } ++ ++ def _connect(self, password): ++ # the oc binary must be _locally_ available for this to work ++ if not is_executable('oc'): ++ return False ++ ++ # deploy the debug container we'll exec into ++ podconf = self.get_node_pod_config() ++ self.pod_name = podconf['metadata']['name'] ++ fd, self.pod_tmp_conf = tempfile.mkstemp(dir=self.tmpdir) ++ with open(fd, 'w') as cfile: ++ json.dump(podconf, cfile) ++ self.log_debug("Starting sos collector container '%s'" % self.pod_name) ++ # this specifically does not need to run with a project definition ++ out = sos_get_command_output( ++ "oc create -f %s" % self.pod_tmp_conf ++ ) ++ if (out['status'] != 0 or "pod/%s created" % self.pod_name not in ++ out['output']): ++ self.log_error("Unable to deploy sos collect pod") ++ self.log_debug("Debug pod deployment failed: %s" % out['output']) ++ return False ++ self.log_debug("Pod '%s' successfully deployed, waiting for pod to " ++ "enter ready state" % self.pod_name) ++ ++ # wait for the pod to report as running ++ try: ++ up = self.run_oc("wait --for=condition=Ready pod/%s --timeout=30s" ++ % self.pod_name, ++ # timeout is for local safety, not oc ++ timeout=40) ++ if not up['status'] == 0: ++ self.log_error("Pod not available after 30 seconds") ++ return False ++ except SoSTimeoutError: ++ self.log_error("Timeout while polling for pod readiness") ++ return False ++ except Exception as err: ++ self.log_error("Error while waiting for pod to be ready: %s" ++ % err) ++ return False ++ ++ return True ++ ++ def _format_cmd_for_exec(self, cmd): ++ if cmd.startswith('oc'): ++ return ("oc -n %s exec --request-timeout=0 %s -- chroot /host %s" ++ % (self.project, self.pod_name, cmd)) ++ return super(OCTransport, self)._format_cmd_for_exec(cmd) ++ ++ def run_command(self, cmd, timeout=180, need_root=False, env=None, ++ get_pty=False): ++ # debug pod setup is slow, extend all timeouts to account for this ++ if timeout: ++ timeout += 10 ++ ++ # since we always execute within a bash shell, force disable get_pty ++ # to avoid double-quoting ++ return super(OCTransport, self).run_command(cmd, timeout, need_root, ++ env, False) ++ ++ def _disconnect(self): ++ os.unlink(self.pod_tmp_conf) ++ removed = self.run_oc("delete pod %s" % self.pod_name) ++ if "deleted" not in removed['output']: ++ self.log_debug("Calling delete on pod '%s' failed: %s" ++ % (self.pod_name, removed)) ++ return False ++ return True ++ ++ @property ++ def remote_exec(self): ++ return ("oc -n %s exec --request-timeout=0 %s -- /bin/bash -c" ++ % (self.project, self.pod_name)) ++ ++ def _retrieve_file(self, fname, dest): ++ cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest)) ++ return cmd['status'] == 0 +-- +2.31.1 + + +From 460494c4296db1a7529b44fe8f6597544c917c02 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 11 Oct 2021 11:50:44 -0400 +Subject: [PATCH 3/4] [ocp] Create temporary project and restrict default node + list to masters + +Adds explicit setup of a new project to use in the `ocp` cluster and +adds better handling of cluster setup generally, which the `ocp` cluster +is the first to make use of. + +Included in this change is a correction to +`Cluster.exec_primary_cmd()`'s use of `get_pty` to now be determined on +if the primary node is the local node or not. + +Additionally, based on feedback from the OCP engineering team, by +default restrict node lists to masters. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/__init__.py | 5 ++++ + sos/collector/clusters/__init__.py | 13 +++++++- + sos/collector/clusters/ocp.py | 48 ++++++++++++++++++++++++++++-- + sos/collector/transports/oc.py | 4 +-- + 4 files changed, 64 insertions(+), 6 deletions(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index fecfe6aa..a76f8a79 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -850,6 +850,7 @@ class SoSCollector(SoSComponent): + "CTRL-C to quit\n") + self.ui_log.info("") + except KeyboardInterrupt: ++ self.cluster.cleanup() + self.exit("Exiting on user cancel", 130) + + def configure_sos_cmd(self): +@@ -1098,6 +1099,7 @@ this utility or remote systems that it connects to. + self.archive.makedirs('sos_logs', 0o755) + + self.collect() ++ self.cluster.cleanup() + self.cleanup() + + def collect(self): +@@ -1156,9 +1158,11 @@ this utility or remote systems that it connects to. + pool.shutdown(wait=True) + except KeyboardInterrupt: + self.log_error('Exiting on user cancel\n') ++ self.cluster.cleanup() + os._exit(130) + except Exception as err: + self.log_error('Could not connect to nodes: %s' % err) ++ self.cluster.cleanup() + os._exit(1) + + if hasattr(self.cluster, 'run_extra_cmd'): +@@ -1199,6 +1199,7 @@ this utility or remote systems that it c + arc_name = self.create_cluster_archive() + else: + msg = 'No sosreports were collected, nothing to archive...' ++ self.cluster.cleanup() + self.exit(msg, 1) + + if self.opts.upload and self.policy.get_upload_url(): +diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py +index cf1e7a0b..2a4665a1 100644 +--- a/sos/collector/clusters/__init__.py ++++ b/sos/collector/clusters/__init__.py +@@ -192,7 +192,8 @@ class Cluster(): + :returns: The output and status of `cmd` + :rtype: ``dict`` + """ +- res = self.master.run_command(cmd, get_pty=True, need_root=need_root) ++ pty = self.master.local is False ++ res = self.master.run_command(cmd, get_pty=pty, need_root=need_root) + if res['output']: + res['output'] = res['output'].replace('Password:', '') + return res +@@ -223,6 +224,16 @@ class Cluster(): + return True + return False + ++ def cleanup(self): ++ """ ++ This may be overridden by clusters ++ ++ Perform any necessary cleanup steps required by the cluster profile. ++ This helps ensure that sos does make lasting changes to the environment ++ in which we are running ++ """ ++ pass ++ + def get_nodes(self): + """ + This MUST be overridden by a cluster profile subclassing this class +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index a9357dbf..92da4e6e 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -23,10 +23,12 @@ class ocp(Cluster): + + api_collect_enabled = False + token = None ++ project = 'sos-collect-tmp' ++ oc_cluster_admin = None + + option_list = [ + ('label', '', 'Colon delimited list of labels to select nodes with'), +- ('role', '', 'Colon delimited list of roles to select nodes with'), ++ ('role', 'master', 'Colon delimited list of roles to filter on'), + ('kubeconfig', '', 'Path to the kubeconfig file'), + ('token', '', 'Service account token to use for oc authorization') + ] +@@ -58,6 +58,42 @@ class ocp(Cluster): + _who = self.fmt_oc_cmd('whoami') + return self.exec_master_cmd(_who)['status'] == 0 + ++ def setup(self): ++ """Create the project that we will be executing in for any nodes' ++ collection via a container image ++ """ ++ if not self.set_transport_type() == 'oc': ++ return ++ ++ out = self.exec_master_cmd(self.fmt_oc_cmd("auth can-i '*' '*'")) ++ self.oc_cluster_admin = out['status'] == 0 ++ if not self.oc_cluster_admin: ++ self.log_debug("Check for cluster-admin privileges returned false," ++ " cannot create project in OCP cluster") ++ raise Exception("Insufficient permissions to create temporary " ++ "collection project.\nAborting...") ++ ++ self.log_info("Creating new temporary project '%s'" % self.project) ++ ret = self.exec_master_cmd("oc new-project %s" % self.project) ++ if ret['status'] == 0: ++ return True ++ ++ self.log_debug("Failed to create project: %s" % ret['output']) ++ raise Exception("Failed to create temporary project for collection. " ++ "\nAborting...") ++ ++ def cleanup(self): ++ """Remove the project we created to execute within ++ """ ++ if self.project: ++ ret = self.exec_master_cmd("oc delete project %s" % self.project) ++ if not ret['status'] == 0: ++ self.log_error("Error deleting temporary project: %s" ++ % ret['output']) ++ # don't leave the config on a non-existing project ++ self.exec_master_cmd("oc project default") ++ return True ++ + def _build_dict(self, nodelist): + """From the output of get_nodes(), construct an easier-to-reference + dict of nodes that will be used in determining labels, master status, +@@ -85,10 +123,10 @@ class ocp(Cluster): + return nodes + + def set_transport_type(self): +- if is_executable('oc'): ++ if is_executable('oc') or self.opts.transport == 'oc': + return 'oc' + self.log_info("Local installation of 'oc' not found or is not " +- "correctly configured. Will use ControlPersist") ++ "correctly configured. Will use ControlPersist.") + self.ui_log.warn( + "Preferred transport 'oc' not available, will fallback to SSH." + ) +@@ -106,6 +144,10 @@ class ocp(Cluster): + cmd += " -l %s" % quote(labels) + res = self.exec_master_cmd(self.fmt_oc_cmd(cmd)) + if res['status'] == 0: ++ if self.get_option('role') == 'master': ++ self.log_warn("NOTE: By default, only master nodes are listed." ++ "\nTo collect from all/more nodes, override the " ++ "role option with '-c ocp.role=role1:role2'") + roles = [r for r in self.get_option('role').split(':')] + self.node_dict = self._build_dict(res['output'].splitlines()) + for node in self.node_dict: +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +index 649037b9..de044ccb 100644 +--- a/sos/collector/transports/oc.py ++++ b/sos/collector/transports/oc.py +@@ -37,7 +37,7 @@ class OCTransport(RemoteTransport): + execution + """ + return sos_get_command_output( +- "oc -n sos-collect-tmp %s" % cmd, ++ "oc -n %s %s" % (self.project, cmd), + **kwargs + ) + +@@ -58,7 +58,7 @@ class OCTransport(RemoteTransport): + "apiVersion": "v1", + "metadata": { + "name": "%s-sos-collector" % self.address.split('.')[0], +- "namespace": "sos-collect-tmp" ++ "namespace": self.project + }, + "priorityClassName": "system-cluster-critical", + "spec": { +-- +2.31.1 + + +From 1bc0e9fe32491e764e622368bfe216f97bf32620 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 4 Oct 2021 15:12:04 -0400 +Subject: [PATCH 4/4] [sosnode] Fix typo and small logic break + +Fixes a typo in setting the non-primary node options from the ocp +profile against the sosnode object. Second, fixes a small break in +checksum handling for the manifest discovered during `oc` transport +testing for edge cases. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/ocp.py | 4 ++-- + sos/collector/sosnode.py | 4 +++- + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 92da4e6e..22a7289a 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -183,7 +183,7 @@ class ocp(Cluster): + if self.api_collect_enabled: + # a primary has already been enabled for API collection, disable + # it among others +- node.plugin_options.append('openshift.no-oc=on') ++ node.plugopts.append('openshift.no-oc=on') + else: + _oc_cmd = 'oc' + if node.host.containerized: +@@ -223,6 +223,6 @@ class ocp(Cluster): + + def set_node_options(self, node): + # don't attempt OC API collections on non-primary nodes +- node.plugin_options.append('openshift.no-oc=on') ++ node.plugopts.append('openshift.no-oc=on') + + # vim: set et ts=4 sw=4 : +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 8a9dbd7a..ab7f23cc 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -714,7 +714,7 @@ class SosNode(): + elif line.startswith("The checksum is: "): + checksum = line.split()[3] + +- if checksum is not None: ++ if checksum: + self.manifest.add_field('checksum', checksum) + if len(checksum) == 32: + self.manifest.add_field('checksum_type', 'md5') +@@ -722,6 +722,8 @@ class SosNode(): + self.manifest.add_field('checksum_type', 'sha256') + else: + self.manifest.add_field('checksum_type', 'unknown') ++ else: ++ self.manifest.add_field('checksum_type', 'unknown') + else: + err = self.determine_sos_error(res['status'], res['output']) + self.log_debug("Error running sos report. rc = %s msg = %s" +-- +2.31.1 + +From 38a0533de3dd2613eefcc4865a2916e225e3ceed Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Tue, 9 Nov 2021 19:34:25 +0100 +Subject: [PATCH] [presets] Optimise OCP preset for hundreds of network + namespaces + +Sos report on OCP having hundreds of namespaces timeouts in networking +plugin, as it collects >10 commands for each namespace. + +Let use a balanced approach in: +- increasing network.timeout +- limiting namespaces to traverse +- disabling ethtool per namespace + +to ensure sos report successfully finish in a reasonable time, +collecting rasonable amount of data. + +Resolves: #2754 + +Signed-off-by: Pavel Moravec +--- + sos/presets/redhat/__init__.py | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py +index e6d63611..865c9b6b 100644 +--- a/sos/presets/redhat/__init__.py ++++ b/sos/presets/redhat/__init__.py +@@ -29,11 +29,15 @@ RHEL_DESC = RHEL_RELEASE_STR + + RHOSP = "rhosp" + RHOSP_DESC = "Red Hat OpenStack Platform" ++RHOSP_OPTS = SoSOptions(plugopts=[ ++ 'process.lsof=off', ++ 'networking.ethtool_namespaces=False', ++ 'networking.namespaces=200']) + + RHOCP = "ocp" + RHOCP_DESC = "OpenShift Container Platform by Red Hat" +-RHOSP_OPTS = SoSOptions(plugopts=[ +- 'process.lsof=off', ++RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[ ++ 'networking.timeout=600', + 'networking.ethtool_namespaces=False', + 'networking.namespaces=200']) + +@@ -62,7 +66,7 @@ RHEL_PRESETS = { + RHEL: PresetDefaults(name=RHEL, desc=RHEL_DESC), + RHOSP: PresetDefaults(name=RHOSP, desc=RHOSP_DESC, opts=RHOSP_OPTS), + RHOCP: PresetDefaults(name=RHOCP, desc=RHOCP_DESC, note=NOTE_SIZE_TIME, +- opts=_opts_all_logs_verify), ++ opts=RHOCP_OPTS), + RH_CFME: PresetDefaults(name=RH_CFME, desc=RH_CFME_DESC, note=NOTE_TIME, + opts=_opts_verify), + RH_SATELLITE: PresetDefaults(name=RH_SATELLITE, desc=RH_SATELLITE_DESC, +-- +2.31.1 + +From 97b93c7f8755d04bdeb4f93759c20dcb787f2046 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 2 Nov 2021 11:34:13 -0400 +Subject: [PATCH] [Plugin] Rework get_container_logs to be more useful + +`get_container_logs()` is now `add_container_logs()` to align it better +with our more common `add_*` methods for plugin collections. + +Additionally, it has been extended to accept either a single string or a +list of strings like the other methods, and plugin authors may now +specify either specific container names or regexes. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 22 +++++++++++++++++----- + sos/report/plugins/rabbitmq.py | 2 +- + 2 files changed, 18 insertions(+), 6 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 08eee118..4b0e4fd5 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -2366,20 +2366,32 @@ class Plugin(): + return _runtime.volumes + return [] + +- def get_container_logs(self, container, **kwargs): +- """Helper to get the ``logs`` output for a given container ++ def add_container_logs(self, containers, get_all=False, **kwargs): ++ """Helper to get the ``logs`` output for a given container or list ++ of container names and/or regexes. + + Supports passthru of add_cmd_output() options + +- :param container: The name of the container to retrieve logs from +- :type container: ``str`` ++ :param containers: The name of the container to retrieve logs from, ++ may be a single name or a regex ++ :type containers: ``str`` or ``list` of strs ++ ++ :param get_all: Should non-running containers also be queried? ++ Default: False ++ :type get_all: ``bool`` + + :param kwargs: Any kwargs supported by ``add_cmd_output()`` are + supported here + """ + _runtime = self._get_container_runtime() + if _runtime is not None: +- self.add_cmd_output(_runtime.get_logs_command(container), **kwargs) ++ if isinstance(containers, str): ++ containers = [containers] ++ for container in containers: ++ _cons = self.get_all_containers_by_regex(container, get_all) ++ for _con in _cons: ++ cmd = _runtime.get_logs_command(_con[1]) ++ self.add_cmd_output(cmd, **kwargs) + + def fmt_container_cmd(self, container, cmd, quotecmd=False): + """Format a command to be executed by the loaded ``ContainerRuntime`` +diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py +index e84b52da..1bfa741f 100644 +--- a/sos/report/plugins/rabbitmq.py ++++ b/sos/report/plugins/rabbitmq.py +@@ -32,7 +32,7 @@ class RabbitMQ(Plugin, IndependentPlugin): + + if in_container: + for container in container_names: +- self.get_container_logs(container) ++ self.add_container_logs(container) + self.add_cmd_output( + self.fmt_container_cmd(container, 'rabbitmqctl report'), + foreground=True +-- +2.31.1 + +From 3d064102f8ca6662fd9602512e1cb05cf8746dfd Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Mon, 27 Sep 2021 19:01:16 -0400 +Subject: [PATCH] [Systemd, Policy] Correct InitSystem chrooting when chroot is + needed + +This commit resolves a situation in which `sos` is being run in a +container but the `SystemdInit` InitSystem would not properly load +information from the host, thus causing the `Plugin.is_service*()` +methods to erroneously fail or return `False`. + +Fix this scenario by pulling the `_container_init()` and related logic +to check for a containerized host sysroot out of the Red Hat specific +policy and into the base `LinuxPolicy` class so that the init system can +be initialized with the correct sysroot, which is now used to chroot the +calls to the relevant `systemctl` commands. + +For now, this does impose the use of looking for the `container` env var +(automatically set by docker, podman, and crio regardless of +distribution) and the use of the `HOST` env var to read where the host's +`/` filesystem is mounted within the container. If desired in the +future, this can be changed to allow policy-specific overrides. For now +however, this extends host collection via an sos container for all +distributions currently shipping sos. + +Note that this issue only affected the `InitSystem` abstraction for +loading information about local services, and did not affect init system +related commands called by plugins as part of those collections. + +Signed-off-by: Jake Hunsaker +--- + sos/policies/distros/__init__.py | 28 ++++++++++++++++++++++++++- + sos/policies/distros/redhat.py | 27 +------------------------- + sos/policies/init_systems/__init__.py | 13 +++++++++++-- + sos/policies/init_systems/systemd.py | 7 ++++--- + 4 files changed, 43 insertions(+), 32 deletions(-) + +diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py +index f5b9fd5b01..c33a356a75 100644 +--- a/sos/policies/distros/__init__.py ++++ b/sos/policies/distros/__init__.py +@@ -29,6 +29,10 @@ + except ImportError: + REQUESTS_LOADED = False + ++# Container environment variables for detecting if we're in a container ++ENV_CONTAINER = 'container' ++ENV_HOST_SYSROOT = 'HOST' ++ + + class LinuxPolicy(Policy): + """This policy is meant to be an abc class that provides common +@@ -69,10 +73,17 @@ def __init__(self, sysroot=None, init=None, probe_runtime=True): + probe_runtime=probe_runtime) + self.init_kernel_modules() + ++ # need to set _host_sysroot before PackageManager() ++ if sysroot: ++ self._container_init() ++ self._host_sysroot = sysroot ++ else: ++ sysroot = self._container_init() ++ + if init is not None: + self.init_system = init + elif os.path.isdir("/run/systemd/system/"): +- self.init_system = SystemdInit() ++ self.init_system = SystemdInit(chroot=sysroot) + else: + self.init_system = InitSystem() + +@@ -130,6 +141,21 @@ def get_local_name(self): + def sanitize_filename(self, name): + return re.sub(r"[^-a-z,A-Z.0-9]", "", name) + ++ def _container_init(self): ++ """Check if sos is running in a container and perform container ++ specific initialisation based on ENV_HOST_SYSROOT. ++ """ ++ if ENV_CONTAINER in os.environ: ++ if os.environ[ENV_CONTAINER] in ['docker', 'oci', 'podman']: ++ self._in_container = True ++ if ENV_HOST_SYSROOT in os.environ: ++ self._host_sysroot = os.environ[ENV_HOST_SYSROOT] ++ use_sysroot = self._in_container and self._host_sysroot is not None ++ if use_sysroot: ++ host_tmp_dir = os.path.abspath(self._host_sysroot + self._tmp_dir) ++ self._tmp_dir = host_tmp_dir ++ return self._host_sysroot if use_sysroot else None ++ + def init_kernel_modules(self): + """Obtain a list of loaded kernel modules to reference later for plugin + enablement and SoSPredicate checks +diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py +index b3a84336be..3476e21fb2 100644 +--- a/sos/policies/distros/redhat.py ++++ b/sos/policies/distros/redhat.py +@@ -17,7 +17,7 @@ + from sos.presets.redhat import (RHEL_PRESETS, ATOMIC_PRESETS, RHV, RHEL, + CB, RHOSP, RHOCP, RH_CFME, RH_SATELLITE, + ATOMIC) +-from sos.policies.distros import LinuxPolicy ++from sos.policies.distros import LinuxPolicy, ENV_HOST_SYSROOT + from sos.policies.package_managers.rpm import RpmPackageManager + from sos import _sos as _ + +@@ -56,12 +56,6 @@ def __init__(self, sysroot=None, init=None, probe_runtime=True, + super(RedHatPolicy, self).__init__(sysroot=sysroot, init=init, + probe_runtime=probe_runtime) + self.usrmove = False +- # need to set _host_sysroot before PackageManager() +- if sysroot: +- self._container_init() +- self._host_sysroot = sysroot +- else: +- sysroot = self._container_init() + + self.package_manager = RpmPackageManager(chroot=sysroot, + remote_exec=remote_exec) +@@ -140,21 +134,6 @@ def transform_path(path): + else: + return files + +- def _container_init(self): +- """Check if sos is running in a container and perform container +- specific initialisation based on ENV_HOST_SYSROOT. +- """ +- if ENV_CONTAINER in os.environ: +- if os.environ[ENV_CONTAINER] in ['docker', 'oci', 'podman']: +- self._in_container = True +- if ENV_HOST_SYSROOT in os.environ: +- self._host_sysroot = os.environ[ENV_HOST_SYSROOT] +- use_sysroot = self._in_container and self._host_sysroot is not None +- if use_sysroot: +- host_tmp_dir = os.path.abspath(self._host_sysroot + self._tmp_dir) +- self._tmp_dir = host_tmp_dir +- return self._host_sysroot if use_sysroot else None +- + def runlevel_by_service(self, name): + from subprocess import Popen, PIPE + ret = [] +@@ -183,10 +162,6 @@ def get_tmp_dir(self, opt_tmp_dir): + return opt_tmp_dir + + +-# Container environment variables on Red Hat systems. +-ENV_CONTAINER = 'container' +-ENV_HOST_SYSROOT = 'HOST' +- + # Legal disclaimer text for Red Hat products + disclaimer_text = """ + Any information provided to %(vendor)s will be treated in \ +diff --git a/sos/policies/init_systems/__init__.py b/sos/policies/init_systems/__init__.py +index dd663e6522..beac44cee3 100644 +--- a/sos/policies/init_systems/__init__.py ++++ b/sos/policies/init_systems/__init__.py +@@ -29,9 +29,14 @@ class InitSystem(): + status of services + :type query_cmd: ``str`` + ++ :param chroot: Location to chroot to for any command execution, i.e. the ++ sysroot if we're running in a container ++ :type chroot: ``str`` or ``None`` ++ + """ + +- def __init__(self, init_cmd=None, list_cmd=None, query_cmd=None): ++ def __init__(self, init_cmd=None, list_cmd=None, query_cmd=None, ++ chroot=None): + """Initialize a new InitSystem()""" + + self.services = {} +@@ -39,6 +44,7 @@ def __init__(self, init_cmd=None, list_cmd=None, query_cmd=None): + self.init_cmd = init_cmd + self.list_cmd = "%s %s" % (self.init_cmd, list_cmd) or None + self.query_cmd = "%s %s" % (self.init_cmd, query_cmd) or None ++ self.chroot = chroot + + def is_enabled(self, name): + """Check if given service name is enabled +@@ -108,7 +114,10 @@ def _query_service(self, name): + """Query an individual service""" + if self.query_cmd: + try: +- return sos_get_command_output("%s %s" % (self.query_cmd, name)) ++ return sos_get_command_output( ++ "%s %s" % (self.query_cmd, name), ++ chroot=self.chroot ++ ) + except Exception: + return None + return None +diff --git a/sos/policies/init_systems/systemd.py b/sos/policies/init_systems/systemd.py +index 1b138f97b3..76dc57e27f 100644 +--- a/sos/policies/init_systems/systemd.py ++++ b/sos/policies/init_systems/systemd.py +@@ -15,11 +15,12 @@ + class SystemdInit(InitSystem): + """InitSystem abstraction for SystemD systems""" + +- def __init__(self): ++ def __init__(self, chroot=None): + super(SystemdInit, self).__init__( + init_cmd='systemctl', + list_cmd='list-unit-files --type=service', +- query_cmd='status' ++ query_cmd='status', ++ chroot=chroot + ) + self.load_all_services() + +@@ -30,7 +31,7 @@ def parse_query(self, output): + return 'unknown' + + def load_all_services(self): +- svcs = shell_out(self.list_cmd).splitlines()[1:] ++ svcs = shell_out(self.list_cmd, chroot=self.chroot).splitlines()[1:] + for line in svcs: + try: + name = line.split('.service')[0] +-- +2.31.1 + +From 9596473d1779b9c48e9923c220aaf2b8d9b3bebf Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 18 Nov 2021 13:17:14 -0500 +Subject: [PATCH] [global] Align sysroot determination and usage across sos + +The determination of sysroot - being automatic, user-specified, or +controlled via environment variables in a container - has gotten muddied +over time. This has resulted in different parts of the project; +`Policy`, `Plugin`, `SoSComponent`, etc... to not always be in sync when +sysroot is not `/`, thus causing varying and unexpected/unintended +behavior. + +Fix this by only determining sysroot within `Policy()` initialization, +and then using that determination across all aspects of the project that +use or reference sysroot. + +This results in several changes: + +- `PackageManager()` will now (again) correctly reference host package + lists when sos is run in a container. + +- `ContainerRuntime()` is now able to activate when sos is running in a + container. + +- Plugins will now properly use sysroot for _all_ plugin enablement + triggers. + +- Plugins, Policy, and SoSComponents now all reference the + `self.sysroot` variable, rather than changing between `sysroot`. +`_host_sysroot`, and `commons['sysroot']`. `_host_sysroot` has been +removed from `Policy`. + +Signed-off-by: Jake Hunsaker +--- + sos/archive.py | 2 +- + sos/component.py | 2 +- + sos/policies/__init__.py | 11 +---------- + sos/policies/distros/__init__.py | 33 +++++++++++++++++++------------ + sos/policies/distros/debian.py | 2 +- + sos/policies/distros/redhat.py | 3 +-- + sos/policies/runtimes/__init__.py | 15 +++++++++----- + sos/policies/runtimes/docker.py | 4 ++-- + sos/report/__init__.py | 6 ++---- + sos/report/plugins/__init__.py | 22 +++++++++++---------- + sos/report/plugins/unpackaged.py | 7 ++++--- + sos/utilities.py | 13 ++++++++---- + 12 files changed, 64 insertions(+), 56 deletions(-) + +diff --git a/sos/archive.py b/sos/archive.py +index b02b2475..e3c68b77 100644 +--- a/sos/archive.py ++++ b/sos/archive.py +@@ -153,7 +153,7 @@ class FileCacheArchive(Archive): + return (os.path.join(self._archive_root, name)) + + def join_sysroot(self, path): +- if path.startswith(self.sysroot): ++ if not self.sysroot or path.startswith(self.sysroot): + return path + if path[0] == os.sep: + path = path[1:] +diff --git a/sos/component.py b/sos/component.py +index 5ac6e47f..dba0aabf 100644 +--- a/sos/component.py ++++ b/sos/component.py +@@ -109,7 +109,7 @@ class SoSComponent(): + try: + import sos.policies + self.policy = sos.policies.load(sysroot=self.opts.sysroot) +- self.sysroot = self.policy.host_sysroot() ++ self.sysroot = self.policy.sysroot + except KeyboardInterrupt: + self._exit(0) + self._is_root = self.policy.is_root() +diff --git a/sos/policies/__init__.py b/sos/policies/__init__.py +index fb8db1d7..ef9188de 100644 +--- a/sos/policies/__init__.py ++++ b/sos/policies/__init__.py +@@ -110,7 +110,6 @@ any third party. + presets = {"": PresetDefaults()} + presets_path = PRESETS_PATH + _in_container = False +- _host_sysroot = '/' + + def __init__(self, sysroot=None, probe_runtime=True): + """Subclasses that choose to override this initializer should call +@@ -124,7 +123,7 @@ any third party. + self.package_manager = PackageManager() + self.valid_subclasses = [IndependentPlugin] + self.set_exec_path() +- self._host_sysroot = sysroot ++ self.sysroot = sysroot + self.register_presets(GENERIC_PRESETS) + + def check(self, remote=''): +@@ -177,14 +176,6 @@ any third party. + """ + return self._in_container + +- def host_sysroot(self): +- """Get the host's default sysroot +- +- :returns: Host sysroot +- :rtype: ``str`` or ``None`` +- """ +- return self._host_sysroot +- + def dist_version(self): + """ + Return the OS version +diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py +index 7bdc81b8..c69fc1e7 100644 +--- a/sos/policies/distros/__init__.py ++++ b/sos/policies/distros/__init__.py +@@ -71,19 +71,18 @@ class LinuxPolicy(Policy): + def __init__(self, sysroot=None, init=None, probe_runtime=True): + super(LinuxPolicy, self).__init__(sysroot=sysroot, + probe_runtime=probe_runtime) +- self.init_kernel_modules() + +- # need to set _host_sysroot before PackageManager() + if sysroot: +- self._container_init() +- self._host_sysroot = sysroot ++ self.sysroot = sysroot + else: +- sysroot = self._container_init() ++ self.sysroot = self._container_init() ++ ++ self.init_kernel_modules() + + if init is not None: + self.init_system = init + elif os.path.isdir("/run/systemd/system/"): +- self.init_system = SystemdInit(chroot=sysroot) ++ self.init_system = SystemdInit(chroot=self.sysroot) + else: + self.init_system = InitSystem() + +@@ -149,27 +148,30 @@ class LinuxPolicy(Policy): + if os.environ[ENV_CONTAINER] in ['docker', 'oci', 'podman']: + self._in_container = True + if ENV_HOST_SYSROOT in os.environ: +- self._host_sysroot = os.environ[ENV_HOST_SYSROOT] +- use_sysroot = self._in_container and self._host_sysroot is not None ++ _host_sysroot = os.environ[ENV_HOST_SYSROOT] ++ use_sysroot = self._in_container and _host_sysroot is not None + if use_sysroot: +- host_tmp_dir = os.path.abspath(self._host_sysroot + self._tmp_dir) ++ host_tmp_dir = os.path.abspath(_host_sysroot + self._tmp_dir) + self._tmp_dir = host_tmp_dir +- return self._host_sysroot if use_sysroot else None ++ return _host_sysroot if use_sysroot else None + + def init_kernel_modules(self): + """Obtain a list of loaded kernel modules to reference later for plugin + enablement and SoSPredicate checks + """ + self.kernel_mods = [] ++ release = os.uname().release + + # first load modules from lsmod +- lines = shell_out("lsmod", timeout=0).splitlines() ++ lines = shell_out("lsmod", timeout=0, chroot=self.sysroot).splitlines() + self.kernel_mods.extend([ + line.split()[0].strip() for line in lines[1:] + ]) + + # next, include kernel builtins +- builtins = "/usr/lib/modules/%s/modules.builtin" % os.uname().release ++ builtins = self.join_sysroot( ++ "/usr/lib/modules/%s/modules.builtin" % release ++ ) + try: + with open(builtins, "r") as mfile: + for line in mfile: +@@ -186,7 +188,7 @@ class LinuxPolicy(Policy): + 'dm_mod': 'CONFIG_BLK_DEV_DM' + } + +- booted_config = "/boot/config-%s" % os.uname().release ++ booted_config = self.join_sysroot("/boot/config-%s" % release) + kconfigs = [] + try: + with open(booted_config, "r") as kfile: +@@ -200,6 +202,11 @@ class LinuxPolicy(Policy): + if config_strings[builtin] in kconfigs: + self.kernel_mods.append(builtin) + ++ def join_sysroot(self, path): ++ if self.sysroot and self.sysroot != '/': ++ path = os.path.join(self.sysroot, path.lstrip('/')) ++ return path ++ + def pre_work(self): + # this method will be called before the gathering begins + +diff --git a/sos/policies/distros/debian.py b/sos/policies/distros/debian.py +index 95b389a6..639fd5eb 100644 +--- a/sos/policies/distros/debian.py ++++ b/sos/policies/distros/debian.py +@@ -27,7 +27,7 @@ class DebianPolicy(LinuxPolicy): + remote_exec=None): + super(DebianPolicy, self).__init__(sysroot=sysroot, init=init, + probe_runtime=probe_runtime) +- self.package_manager = DpkgPackageManager(chroot=sysroot, ++ self.package_manager = DpkgPackageManager(chroot=self.sysroot, + remote_exec=remote_exec) + self.valid_subclasses += [DebianPlugin] + +diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py +index eb442407..4b14abaf 100644 +--- a/sos/policies/distros/redhat.py ++++ b/sos/policies/distros/redhat.py +@@ -42,7 +42,6 @@ class RedHatPolicy(LinuxPolicy): + _redhat_release = '/etc/redhat-release' + _tmp_dir = "/var/tmp" + _in_container = False +- _host_sysroot = '/' + default_scl_prefix = '/opt/rh' + name_pattern = 'friendly' + upload_url = None +@@ -57,7 +56,7 @@ class RedHatPolicy(LinuxPolicy): + probe_runtime=probe_runtime) + self.usrmove = False + +- self.package_manager = RpmPackageManager(chroot=sysroot, ++ self.package_manager = RpmPackageManager(chroot=self.sysroot, + remote_exec=remote_exec) + + self.valid_subclasses += [RedHatPlugin] +diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py +index f28d6a1d..2e60ad23 100644 +--- a/sos/policies/runtimes/__init__.py ++++ b/sos/policies/runtimes/__init__.py +@@ -64,7 +64,7 @@ class ContainerRuntime(): + :returns: ``True`` if the runtime is active, else ``False`` + :rtype: ``bool`` + """ +- if is_executable(self.binary): ++ if is_executable(self.binary, self.policy.sysroot): + self.active = True + return True + return False +@@ -78,7 +78,7 @@ class ContainerRuntime(): + containers = [] + _cmd = "%s ps %s" % (self.binary, '-a' if get_all else '') + if self.active: +- out = sos_get_command_output(_cmd) ++ out = sos_get_command_output(_cmd, chroot=self.policy.sysroot) + if out['status'] == 0: + for ent in out['output'].splitlines()[1:]: + ent = ent.split() +@@ -112,8 +112,10 @@ class ContainerRuntime(): + images = [] + fmt = '{{lower .Repository}}:{{lower .Tag}} {{lower .ID}}' + if self.active: +- out = sos_get_command_output("%s images --format '%s'" +- % (self.binary, fmt)) ++ out = sos_get_command_output( ++ "%s images --format '%s'" % (self.binary, fmt), ++ chroot=self.policy.sysroot ++ ) + if out['status'] == 0: + for ent in out['output'].splitlines(): + ent = ent.split() +@@ -129,7 +131,10 @@ class ContainerRuntime(): + """ + vols = [] + if self.active: +- out = sos_get_command_output("%s volume ls" % self.binary) ++ out = sos_get_command_output( ++ "%s volume ls" % self.binary, ++ chroot=self.policy.sysroot ++ ) + if out['status'] == 0: + for ent in out['output'].splitlines()[1:]: + ent = ent.split() +diff --git a/sos/policies/runtimes/docker.py b/sos/policies/runtimes/docker.py +index 759dfaf6..e81f580e 100644 +--- a/sos/policies/runtimes/docker.py ++++ b/sos/policies/runtimes/docker.py +@@ -18,9 +18,9 @@ class DockerContainerRuntime(ContainerRuntime): + name = 'docker' + binary = 'docker' + +- def check_is_active(self): ++ def check_is_active(self, sysroot=None): + # the daemon must be running +- if (is_executable('docker') and ++ if (is_executable('docker', sysroot) and + (self.policy.init_system.is_running('docker') or + self.policy.init_system.is_running('snap.docker.dockerd'))): + self.active = True +diff --git a/sos/report/__init__.py b/sos/report/__init__.py +index a4c92acc..a6c72778 100644 +--- a/sos/report/__init__.py ++++ b/sos/report/__init__.py +@@ -173,14 +173,12 @@ class SoSReport(SoSComponent): + self._set_directories() + + msg = "default" +- host_sysroot = self.policy.host_sysroot() ++ self.sysroot = self.policy.sysroot + # set alternate system root directory + if self.opts.sysroot: + msg = "cmdline" +- self.sysroot = self.opts.sysroot +- elif self.policy.in_container() and host_sysroot != os.sep: ++ elif self.policy.in_container() and self.sysroot != os.sep: + msg = "policy" +- self.sysroot = host_sysroot + self.soslog.debug("set sysroot to '%s' (%s)" % (self.sysroot, msg)) + + if self.opts.chroot not in chroot_modes: +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index 46028bb1..e180ae17 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -724,7 +724,7 @@ class Plugin(): + """ + if not self.use_sysroot(): + return path +- if path.startswith(self.sysroot): ++ if self.sysroot and path.startswith(self.sysroot): + return path[len(self.sysroot):] + return path + +@@ -743,8 +743,10 @@ class Plugin(): + ``False`` + :rtype: ``bool`` + """ +- paths = [self.sysroot, self.archive.get_tmp_dir()] +- return os.path.commonprefix(paths) == self.sysroot ++ # if sysroot is still None, that implies '/' ++ _sysroot = self.sysroot or '/' ++ paths = [_sysroot, self.archive.get_tmp_dir()] ++ return os.path.commonprefix(paths) == _sysroot + + def is_installed(self, package_name): + """Is the package $package_name installed? +@@ -2621,7 +2623,7 @@ class Plugin(): + return list(set(expanded)) + + def _collect_copy_specs(self): +- for path in self.copy_paths: ++ for path in sorted(self.copy_paths, reverse=True): + self._log_info("collecting path '%s'" % path) + self._do_copy_path(path) + self.generate_copyspec_tags() +@@ -2749,7 +2751,7 @@ class Plugin(): + + return ((any(self.path_exists(fname) for fname in files) or + any(self.is_installed(pkg) for pkg in packages) or +- any(is_executable(cmd) for cmd in commands) or ++ any(is_executable(cmd, self.sysroot) for cmd in commands) or + any(self.is_module_loaded(mod) for mod in self.kernel_mods) or + any(self.is_service(svc) for svc in services) or + any(self.container_exists(cntr) for cntr in containers)) and +@@ -2817,7 +2819,7 @@ class Plugin(): + :returns: True if the path exists in sysroot, else False + :rtype: ``bool`` + """ +- return path_exists(path, self.commons['cmdlineopts'].sysroot) ++ return path_exists(path, self.sysroot) + + def path_isdir(self, path): + """Helper to call the sos.utilities wrapper that allows the +@@ -2830,7 +2832,7 @@ class Plugin(): + :returns: True if the path is a dir, else False + :rtype: ``bool`` + """ +- return path_isdir(path, self.commons['cmdlineopts'].sysroot) ++ return path_isdir(path, self.sysroot) + + def path_isfile(self, path): + """Helper to call the sos.utilities wrapper that allows the +@@ -2843,7 +2845,7 @@ class Plugin(): + :returns: True if the path is a file, else False + :rtype: ``bool`` + """ +- return path_isfile(path, self.commons['cmdlineopts'].sysroot) ++ return path_isfile(path, self.sysroot) + + def path_islink(self, path): + """Helper to call the sos.utilities wrapper that allows the +@@ -2856,7 +2858,7 @@ class Plugin(): + :returns: True if the path is a link, else False + :rtype: ``bool`` + """ +- return path_islink(path, self.commons['cmdlineopts'].sysroot) ++ return path_islink(path, self.sysroot) + + def listdir(self, path): + """Helper to call the sos.utilities wrapper that allows the +@@ -2869,7 +2871,7 @@ class Plugin(): + :returns: Contents of path, if it is a directory + :rtype: ``list`` + """ +- return listdir(path, self.commons['cmdlineopts'].sysroot) ++ return listdir(path, self.sysroot) + + def path_join(self, path, *p): + """Helper to call the sos.utilities wrapper that allows the +diff --git a/sos/report/plugins/unpackaged.py b/sos/report/plugins/unpackaged.py +index 772b1d1f..24203c4b 100644 +--- a/sos/report/plugins/unpackaged.py ++++ b/sos/report/plugins/unpackaged.py +@@ -58,10 +58,11 @@ class Unpackaged(Plugin, RedHatPlugin): + """ + expanded = [] + for f in files: +- if self.path_islink(f): +- expanded.append("{} -> {}".format(f, os.readlink(f))) ++ fp = self.path_join(f) ++ if self.path_islink(fp): ++ expanded.append("{} -> {}".format(fp, os.readlink(fp))) + else: +- expanded.append(f) ++ expanded.append(fp) + return expanded + + # Check command predicate to avoid costly processing +diff --git a/sos/utilities.py b/sos/utilities.py +index b7575153..d6630933 100644 +--- a/sos/utilities.py ++++ b/sos/utilities.py +@@ -96,11 +96,15 @@ def grep(pattern, *files_or_paths): + return matches + + +-def is_executable(command): ++def is_executable(command, sysroot=None): + """Returns if a command matches an executable on the PATH""" + + paths = os.environ.get("PATH", "").split(os.path.pathsep) + candidates = [command] + [os.path.join(p, command) for p in paths] ++ if sysroot: ++ candidates += [ ++ os.path.join(sysroot, c.lstrip('/')) for c in candidates ++ ] + return any(os.access(path, os.X_OK) for path in candidates) + + +@@ -216,8 +220,9 @@ def get_human_readable(size, precision=2): + + + def _os_wrapper(path, sysroot, method, module=os.path): +- if sysroot not in [None, '/']: +- path = os.path.join(sysroot, path.lstrip('/')) ++ if sysroot and sysroot != os.sep: ++ if not path.startswith(sysroot): ++ path = os.path.join(sysroot, path.lstrip('/')) + _meth = getattr(module, method) + return _meth(path) + +@@ -243,7 +248,7 @@ def listdir(path, sysroot): + + + def path_join(path, *p, sysroot=os.sep): +- if not path.startswith(sysroot): ++ if sysroot and not path.startswith(sysroot): + path = os.path.join(sysroot, path.lstrip(os.sep)) + return os.path.join(path, *p) + +-- +2.31.1 + +From 8bf602108f75db10e449eff5e2266c6466504086 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Thu, 2 Dec 2021 16:30:44 +0100 +Subject: [PATCH] [clusters:ocp] fix get_nodes function + +Signed-off-by: Nadia Pinaeva +--- + sos/collector/clusters/ocp.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 22a7289a..2ce4e977 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -150,13 +150,13 @@ class ocp(Cluster): + "role option with '-c ocp.role=role1:role2'") + roles = [r for r in self.get_option('role').split(':')] + self.node_dict = self._build_dict(res['output'].splitlines()) +- for node in self.node_dict: ++ for node_name, node in self.node_dict.items(): + if roles: + for role in roles: +- if role in node: +- nodes.append(node) ++ if role == node['roles']: ++ nodes.append(node_name) + else: +- nodes.append(node) ++ nodes.append(node_name) + else: + msg = "'oc' command failed" + if 'Missing or incomplete' in res['output']: +-- +2.31.1 + +From 5d80ac6dc67e12ef00903436c088a1694f9a7dd7 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 1 Dec 2021 14:13:16 -0500 +Subject: [PATCH] [collect] Catch command not found exceptions from pexpect + +When running a command that does not exist on the system, catch the +resulting pexpect exception and return the proper status code rather +than allowing an untrapped exception. + +Closes: #2768 + +Signed-off-by: Jake Hunsaker +--- + sos/collector/transports/__init__.py | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py +index 7bffee62..33f2f66d 100644 +--- a/sos/collector/transports/__init__.py ++++ b/sos/collector/transports/__init__.py +@@ -225,7 +225,11 @@ class RemoteTransport(): + if not env: + env = None + +- result = pexpect.spawn(cmd, encoding='utf-8', env=env) ++ try: ++ result = pexpect.spawn(cmd, encoding='utf-8', env=env) ++ except pexpect.exceptions.ExceptionPexpect as err: ++ self.log_debug(err.value) ++ return {'status': 127, 'output': ''} + + _expects = [pexpect.EOF, pexpect.TIMEOUT] + if need_root and self.opts.ssh_user != 'root': +-- +2.31.1 + +From decb5d26c165e664fa879a669f2d80165181f0e1 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 2 Dec 2021 14:02:17 -0500 +Subject: [PATCH] [report,collect] Add option to control default container + runtime + +Adds a new `--container-runtime` option that allows users to control +what default container runtime is used by plugins for container based +collections, effectively overriding policy defaults. + +If no runtimes are active, this option is effectively ignored. If +however runtimes are active, but the requested one is not, raise an +exception to abort collection with an appropriate message to the user. + +Signed-off-by: Jake Hunsaker +--- + man/en/sos-collect.1 | 6 ++++++ + man/en/sos-report.1 | 19 +++++++++++++++++++ + sos/collector/__init__.py | 4 ++++ + sos/collector/sosnode.py | 6 ++++++ + sos/report/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ + 5 files changed, 71 insertions(+) + +diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1 +index a1f6c10e..9b0a5d7b 100644 +--- a/man/en/sos-collect.1 ++++ b/man/en/sos-collect.1 +@@ -11,6 +11,7 @@ sos collect \- Collect sosreports from multiple (cluster) nodes + [\-\-chroot CHROOT] + [\-\-case\-id CASE_ID] + [\-\-cluster\-type CLUSTER_TYPE] ++ [\-\-container\-runtime RUNTIME] + [\-e ENABLE_PLUGINS] + [--encrypt-key KEY]\fR + [--encrypt-pass PASS]\fR +@@ -113,6 +114,11 @@ Example: \fBsos collect --cluster-type=kubernetes\fR will force the kubernetes p + to be run, and thus set sosreport options and attempt to determine a list of nodes using + that profile. + .TP ++\fB\-\-container\-runtime\fR RUNTIME ++\fB sos report\fR option. Using this with \fBcollect\fR will pass this option thru ++to nodes with sos version 4.3 or later. This option controls the default container ++runtime plugins will use for collections. See \fBman sos-report\fR. ++.TP + \fB\-e\fR ENABLE_PLUGINS, \fB\-\-enable\-plugins\fR ENABLE_PLUGINS + Sosreport option. Use this to enable a plugin that would otherwise not be run. + +diff --git a/man/en/sos-report.1 b/man/en/sos-report.1 +index e8efc8f8..464a77e5 100644 +--- a/man/en/sos-report.1 ++++ b/man/en/sos-report.1 +@@ -19,6 +19,7 @@ sos report \- Collect and package diagnostic and support data + [--plugin-timeout TIMEOUT]\fR + [--cmd-timeout TIMEOUT]\fR + [--namespaces NAMESPACES]\fR ++ [--container-runtime RUNTIME]\fR + [-s|--sysroot SYSROOT]\fR + [-c|--chroot {auto|always|never}\fR + [--tmp-dir directory]\fR +@@ -299,6 +300,24 @@ Use '0' (default) for no limit - all namespaces will be used for collections. + + Note that specific plugins may provide a similar `namespaces` plugin option. If + the plugin option is used, it will override this option. ++.TP ++.B \--container-runtime RUNTIME ++Force the use of the specified RUNTIME as the default runtime that plugins will ++use to collect data from and about containers and container images. By default, ++the setting of \fBauto\fR results in the local policy determining what runtime ++will be the default runtime (in configurations where multiple runtimes are installed ++and active). ++ ++If no container runtimes are active, this option is ignored. If there are runtimes ++active, but not one with a name matching RUNTIME, sos will abort. ++ ++Setting this to \fBnone\fR, \fBoff\fR, or \fBdisabled\fR will cause plugins to ++\fBNOT\fR leverage any active runtimes for collections. Note that if disabled, plugins ++specifically for runtimes (e.g. the podman or docker plugins) will still collect ++general data about the runtime, but will not inspect existing containers or images. ++ ++Default: 'auto' (policy determined) ++.TP + .B \--case-id NUMBER + Specify a case identifier to associate with the archive. + Identifiers may include alphanumeric characters, commas and periods ('.'). +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index 42a7731d..3ad703d3 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -55,6 +55,7 @@ class SoSCollector(SoSComponent): + 'clean': False, + 'cluster_options': [], + 'cluster_type': None, ++ 'container_runtime': 'auto', + 'domains': [], + 'enable_plugins': [], + 'encrypt_key': '', +@@ -268,6 +269,9 @@ class SoSCollector(SoSComponent): + sos_grp.add_argument('--chroot', default='', + choices=['auto', 'always', 'never'], + help="chroot executed commands to SYSROOT") ++ sos_grp.add_argument("--container-runtime", default="auto", ++ help="Default container runtime to use for " ++ "collections. 'auto' for policy control.") + sos_grp.add_argument('-e', '--enable-plugins', action="extend", + help='Enable specific plugins for sosreport') + sos_grp.add_argument('-k', '--plugin-option', '--plugopts', +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index ab7f23cc..f5957e17 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -586,6 +586,12 @@ class SosNode(): + sos_opts.append('--cmd-timeout=%s' + % quote(str(self.opts.cmd_timeout))) + ++ if self.check_sos_version('4.3'): ++ if self.opts.container_runtime != 'auto': ++ sos_opts.append( ++ "--container-runtime=%s" % self.opts.container_runtime ++ ) ++ + self.update_cmd_from_cluster() + + sos_cmd = sos_cmd.replace( +diff --git a/sos/report/__init__.py b/sos/report/__init__.py +index a6c72778..0daad82f 100644 +--- a/sos/report/__init__.py ++++ b/sos/report/__init__.py +@@ -82,6 +82,7 @@ class SoSReport(SoSComponent): + 'case_id': '', + 'chroot': 'auto', + 'clean': False, ++ 'container_runtime': 'auto', + 'keep_binary_files': False, + 'desc': '', + 'domains': [], +@@ -187,6 +188,7 @@ class SoSReport(SoSComponent): + self.tempfile_util.clean() + self._exit(1) + ++ self._check_container_runtime() + self._get_hardware_devices() + self._get_namespaces() + +@@ -218,6 +220,9 @@ class SoSReport(SoSComponent): + dest="chroot", default='auto', + help="chroot executed commands to SYSROOT " + "[auto, always, never] (default=auto)") ++ report_grp.add_argument("--container-runtime", default="auto", ++ help="Default container runtime to use for " ++ "collections. 'auto' for policy control.") + report_grp.add_argument("--desc", "--description", type=str, + action="store", default="", + help="Description for a new preset",) +@@ -373,6 +378,37 @@ class SoSReport(SoSComponent): + } + # TODO: enumerate network devices, preferably with devtype info + ++ def _check_container_runtime(self): ++ """Check the loaded container runtimes, and the policy default runtime ++ (if set), against any requested --container-runtime value. This can be ++ useful for systems that have multiple runtimes, such as RHCOS, but do ++ not have a clearly defined 'default' (or one that is determined based ++ entirely on configuration). ++ """ ++ if self.opts.container_runtime != 'auto': ++ crun = self.opts.container_runtime.lower() ++ if crun in ['none', 'off', 'diabled']: ++ self.policy.runtimes = {} ++ self.soslog.info( ++ "Disabled all container runtimes per user option." ++ ) ++ elif not self.policy.runtimes: ++ msg = ("WARNING: No container runtimes are active, ignoring " ++ "option to set default runtime to '%s'\n" % crun) ++ self.soslog.warn(msg) ++ elif crun not in self.policy.runtimes.keys(): ++ valid = ', '.join(p for p in self.policy.runtimes.keys() ++ if p != 'default') ++ raise Exception("Cannot use container runtime '%s': no such " ++ "runtime detected. Available runtimes: %s" ++ % (crun, valid)) ++ else: ++ self.policy.runtimes['default'] = self.policy.runtimes[crun] ++ self.soslog.info( ++ "Set default container runtime to '%s'" ++ % self.policy.runtimes['default'].name ++ ) ++ + def get_fibre_devs(self): + """Enumerate a list of fibrechannel devices on this system so that + plugins can iterate over them +-- +2.31.1 + +From 9d4b5af39d76ac99afa40d004fe9888633218356 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 3 Dec 2021 13:37:09 -0500 +Subject: [PATCH 1/2] [Plugin] Add container parameter for add_cmd_output() + +Adds a new `container` parameter for `Plugin.add_cmd_output()`, which if +set will format all commands passed to that call for execution in the +specified container. + +`Plugin.fmt_container_cmd()` is called for this purpose, and has been +modified so that if the given container does not exist, an empty string +is returned instead, thus preventing execution on the host. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/__init__.py | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py +index e180ae17..3ff7c191 100644 +--- a/sos/report/plugins/__init__.py ++++ b/sos/report/plugins/__init__.py +@@ -1707,7 +1707,7 @@ class Plugin(): + chroot=True, runat=None, env=None, binary=False, + sizelimit=None, pred=None, subdir=None, + changes=False, foreground=False, tags=[], +- priority=10, cmd_as_tag=False): ++ priority=10, cmd_as_tag=False, container=None): + """Run a program or a list of programs and collect the output + + Output will be limited to `sizelimit`, collecting the last X amount +@@ -1772,6 +1772,10 @@ class Plugin(): + :param cmd_as_tag: Should the command string be automatically formatted + to a tag? + :type cmd_as_tag: ``bool`` ++ ++ :param container: Run the specified `cmds` inside a container with this ++ ID or name ++ :type container: ``str`` + """ + if isinstance(cmds, str): + cmds = [cmds] +@@ -1782,6 +1786,14 @@ class Plugin(): + if pred is None: + pred = self.get_predicate(cmd=True) + for cmd in cmds: ++ if container: ++ ocmd = cmd ++ cmd = self.fmt_container_cmd(container, cmd) ++ if not cmd: ++ self._log_debug("Skipping command '%s' as the requested " ++ "container '%s' does not exist." ++ % (ocmd, container)) ++ continue + self._add_cmd_output(cmd=cmd, suggest_filename=suggest_filename, + root_symlink=root_symlink, timeout=timeout, + stderr=stderr, chroot=chroot, runat=runat, +@@ -2420,7 +2432,7 @@ class Plugin(): + if self.container_exists(container): + _runtime = self._get_container_runtime() + return _runtime.fmt_container_cmd(container, cmd, quotecmd) +- return cmd ++ return '' + + def is_module_loaded(self, module_name): + """Determine whether specified module is loaded or not +-- +2.31.1 + + +From 874d2adfbff9e51dc902669af3c4a5083dbc19b1 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 3 Dec 2021 14:49:43 -0500 +Subject: [PATCH 2/2] [plugins] Update existing plugins to use a_c_o container + parameter + +Updates plugins currently calling `fmt_container_cmd()` in their +`add_cmd_output()` calls to instead use the new `container` parameter +and rely on the automatic formatting. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/opencontrail.py | 3 +-- + sos/report/plugins/openstack_database.py | 20 ++++++-------------- + sos/report/plugins/openstack_designate.py | 6 ++---- + sos/report/plugins/openstack_ironic.py | 3 +-- + sos/report/plugins/ovn_central.py | 7 +++---- + sos/report/plugins/rabbitmq.py | 11 ++++++----- + 9 files changed, 47 insertions(+), 69 deletions(-) + +diff --git a/sos/report/plugins/opencontrail.py b/sos/report/plugins/opencontrail.py +index b368bffe..76c03e21 100644 +--- a/sos/report/plugins/opencontrail.py ++++ b/sos/report/plugins/opencontrail.py +@@ -25,8 +25,7 @@ class OpenContrail(Plugin, IndependentPlugin): + cnames = self.get_containers(get_all=True) + cnames = [c[1] for c in cnames if 'opencontrail' in c[1]] + for cntr in cnames: +- _cmd = self.fmt_container_cmd(cntr, 'contrail-status') +- self.add_cmd_output(_cmd) ++ self.add_cmd_output('contrail-status', container=cntr) + else: + self.add_cmd_output("contrail-status") + +diff --git a/sos/report/plugins/openstack_database.py b/sos/report/plugins/openstack_database.py +index 1e98fabf..e9f84cf8 100644 +--- a/sos/report/plugins/openstack_database.py ++++ b/sos/report/plugins/openstack_database.py +@@ -37,36 +37,28 @@ class OpenStackDatabase(Plugin): + ] + + def setup(self): +- +- in_container = False + # determine if we're running databases on the host or in a container + _db_containers = [ + 'galera-bundle-.*', # overcloud + 'mysql' # undercloud + ] + ++ cname = None + for container in _db_containers: + cname = self.get_container_by_name(container) +- if cname is not None: +- in_container = True ++ if cname: + break + +- if in_container: +- fname = "clustercheck_%s" % cname +- cmd = self.fmt_container_cmd(cname, 'clustercheck') +- self.add_cmd_output(cmd, timeout=15, suggest_filename=fname) +- else: +- self.add_cmd_output('clustercheck', timeout=15) ++ fname = "clustercheck_%s" % cname if cname else None ++ self.add_cmd_output('clustercheck', container=cname, timeout=15, ++ suggest_filename=fname) + + if self.get_option('dump') or self.get_option('dumpall'): + db_dump = self.get_mysql_db_string(container=cname) + db_cmd = "mysqldump --opt %s" % db_dump + +- if in_container: +- db_cmd = self.fmt_container_cmd(cname, db_cmd) +- + self.add_cmd_output(db_cmd, suggest_filename='mysql_dump.sql', +- sizelimit=0) ++ sizelimit=0, container=cname) + + def get_mysql_db_string(self, container=None): + +diff --git a/sos/report/plugins/openstack_designate.py b/sos/report/plugins/openstack_designate.py +index 0ae991b0..a2ea37ab 100644 +--- a/sos/report/plugins/openstack_designate.py ++++ b/sos/report/plugins/openstack_designate.py +@@ -20,12 +20,10 @@ class OpenStackDesignate(Plugin): + + def setup(self): + # collect current pool config +- pools_cmd = self.fmt_container_cmd( +- self.get_container_by_name(".*designate_central"), +- "designate-manage pool generate_file --file /dev/stdout") + + self.add_cmd_output( +- pools_cmd, ++ "designate-manage pool generate_file --file /dev/stdout", ++ container=self.get_container_by_name(".*designate_central"), + suggest_filename="openstack_designate_current_pools.yaml" + ) + +diff --git a/sos/report/plugins/openstack_ironic.py b/sos/report/plugins/openstack_ironic.py +index c36fb6b6..49beb2d9 100644 +--- a/sos/report/plugins/openstack_ironic.py ++++ b/sos/report/plugins/openstack_ironic.py +@@ -80,8 +80,7 @@ class OpenStackIronic(Plugin): + 'ironic_pxe_tftp', 'ironic_neutron_agent', + 'ironic_conductor', 'ironic_api']: + if self.container_exists('.*' + container_name): +- self.add_cmd_output(self.fmt_container_cmd(container_name, +- 'rpm -qa')) ++ self.add_cmd_output('rpm -qa', container=container_name) + + else: + self.conf_list = [ +diff --git a/sos/report/plugins/ovn_central.py b/sos/report/plugins/ovn_central.py +index 914eda60..ddbf288d 100644 +--- a/sos/report/plugins/ovn_central.py ++++ b/sos/report/plugins/ovn_central.py +@@ -123,11 +123,10 @@ class OVNCentral(Plugin): + + # If OVN is containerized, we need to run the above commands inside + # the container. +- cmds = [ +- self.fmt_container_cmd(self._container_name, cmd) for cmd in cmds +- ] + +- self.add_cmd_output(cmds, foreground=True) ++ self.add_cmd_output( ++ cmds, foreground=True, container=self._container_name ++ ) + + self.add_copy_spec("/etc/sysconfig/ovn-northd") + +diff --git a/sos/report/plugins/rabbitmq.py b/sos/report/plugins/rabbitmq.py +index 1bfa741f..607802e4 100644 +--- a/sos/report/plugins/rabbitmq.py ++++ b/sos/report/plugins/rabbitmq.py +@@ -34,14 +34,15 @@ class RabbitMQ(Plugin, IndependentPlugin): + for container in container_names: + self.add_container_logs(container) + self.add_cmd_output( +- self.fmt_container_cmd(container, 'rabbitmqctl report'), ++ 'rabbitmqctl report', ++ container=container, + foreground=True + ) + self.add_cmd_output( +- self.fmt_container_cmd( +- container, "rabbitmqctl eval " +- "'rabbit_diagnostics:maybe_stuck().'"), +- foreground=True, timeout=10 ++ "rabbitmqctl eval 'rabbit_diagnostics:maybe_stuck().'", ++ container=container, ++ foreground=True, ++ timeout=10 + ) + else: + self.add_cmd_output("rabbitmqctl report") +-- +2.31.1 + +From faa15754f82e9841cd624afe18dc2198644decdf Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Wed, 8 Dec 2021 13:51:20 -0500 +Subject: [PATCH] [Policy,collect] Prevent remote node policies from setting + local PATH + +This commit fixes an issue where policies loaded for remote nodes when +using `sos collect` would override the PATH setting for the local +policy, which in turn could prevent successful execution of cluster +profile operations. + +Related: #2777 + +Signed-off-by: Jake Hunsaker +--- + sos/policies/__init__.py | 8 +++++--- + sos/policies/distros/__init__.py | 6 ++++-- + sos/policies/distros/debian.py | 3 ++- + sos/policies/distros/redhat.py | 6 ++++-- + sos/policies/distros/suse.py | 3 ++- + 5 files changed, 17 insertions(+), 9 deletions(-) + +diff --git a/sos/policies/__init__.py b/sos/policies/__init__.py +index ef9188de..826d03a1 100644 +--- a/sos/policies/__init__.py ++++ b/sos/policies/__init__.py +@@ -45,7 +45,7 @@ def load(cache={}, sysroot=None, init=None, probe_runtime=True, + return cache['policy'] + + +-class Policy(object): ++class Policy(): + """Policies represent distributions that sos supports, and define the way + in which sos behaves on those distributions. A policy should define at + minimum a way to identify the distribution, and a package manager to allow +@@ -111,7 +111,7 @@ any third party. + presets_path = PRESETS_PATH + _in_container = False + +- def __init__(self, sysroot=None, probe_runtime=True): ++ def __init__(self, sysroot=None, probe_runtime=True, remote_exec=None): + """Subclasses that choose to override this initializer should call + super() to ensure that they get the required platform bits attached. + super(SubClass, self).__init__(). Policies that require runtime +@@ -122,7 +122,9 @@ any third party. + self.probe_runtime = probe_runtime + self.package_manager = PackageManager() + self.valid_subclasses = [IndependentPlugin] +- self.set_exec_path() ++ self.remote_exec = remote_exec ++ if not self.remote_exec: ++ self.set_exec_path() + self.sysroot = sysroot + self.register_presets(GENERIC_PRESETS) + +diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py +index c69fc1e7..9c91a918 100644 +--- a/sos/policies/distros/__init__.py ++++ b/sos/policies/distros/__init__.py +@@ -68,9 +68,11 @@ class LinuxPolicy(Policy): + container_version_command = None + container_authfile = None + +- def __init__(self, sysroot=None, init=None, probe_runtime=True): ++ def __init__(self, sysroot=None, init=None, probe_runtime=True, ++ remote_exec=None): + super(LinuxPolicy, self).__init__(sysroot=sysroot, +- probe_runtime=probe_runtime) ++ probe_runtime=probe_runtime, ++ remote_exec=remote_exec) + + if sysroot: + self.sysroot = sysroot +diff --git a/sos/policies/distros/debian.py b/sos/policies/distros/debian.py +index 639fd5eb..41f09428 100644 +--- a/sos/policies/distros/debian.py ++++ b/sos/policies/distros/debian.py +@@ -26,7 +26,8 @@ class DebianPolicy(LinuxPolicy): + def __init__(self, sysroot=None, init=None, probe_runtime=True, + remote_exec=None): + super(DebianPolicy, self).__init__(sysroot=sysroot, init=init, +- probe_runtime=probe_runtime) ++ probe_runtime=probe_runtime, ++ remote_exec=remote_exec) + self.package_manager = DpkgPackageManager(chroot=self.sysroot, + remote_exec=remote_exec) + self.valid_subclasses += [DebianPlugin] +diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py +index 4b14abaf..eb75e15b 100644 +--- a/sos/policies/distros/redhat.py ++++ b/sos/policies/distros/redhat.py +@@ -53,7 +53,8 @@ class RedHatPolicy(LinuxPolicy): + def __init__(self, sysroot=None, init=None, probe_runtime=True, + remote_exec=None): + super(RedHatPolicy, self).__init__(sysroot=sysroot, init=init, +- probe_runtime=probe_runtime) ++ probe_runtime=probe_runtime, ++ remote_exec=remote_exec) + self.usrmove = False + + self.package_manager = RpmPackageManager(chroot=self.sysroot, +@@ -76,7 +77,8 @@ class RedHatPolicy(LinuxPolicy): + self.PATH = "/sbin:/bin:/usr/sbin:/usr/bin:/root/bin" + self.PATH += os.pathsep + "/usr/local/bin" + self.PATH += os.pathsep + "/usr/local/sbin" +- self.set_exec_path() ++ if not self.remote_exec: ++ self.set_exec_path() + self.load_presets() + + @classmethod +diff --git a/sos/policies/distros/suse.py b/sos/policies/distros/suse.py +index 1c1feff5..b9d4a3b1 100644 +--- a/sos/policies/distros/suse.py ++++ b/sos/policies/distros/suse.py +@@ -25,7 +25,8 @@ class SuSEPolicy(LinuxPolicy): + def __init__(self, sysroot=None, init=None, probe_runtime=True, + remote_exec=None): + super(SuSEPolicy, self).__init__(sysroot=sysroot, init=init, +- probe_runtime=probe_runtime) ++ probe_runtime=probe_runtime, ++ remote_exec=remote_exec) + self.valid_subclasses += [SuSEPlugin, RedHatPlugin] + + self.usrmove = False +-- +2.31.1 + +From d4383fec5f8a80121aa4f5a37575b37988c51663 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Wed, 1 Dec 2021 12:23:34 +0100 +Subject: [PATCH] Add crio runtime and openshift_ovn plugin openshift_ovn + plugin collects logs from crio containers Fix get_container_by_name function + returning container_id and not name + +Signed-off-by: Nadia Pinaeva +--- + sos/policies/distros/__init__.py | 4 +- + sos/policies/runtimes/__init__.py | 2 +- + sos/policies/runtimes/crio.py | 79 +++++++++++++++++++++++++++++ + sos/report/plugins/openshift_ovn.py | 41 +++++++++++++++ + 4 files changed, 124 insertions(+), 2 deletions(-) + create mode 100644 sos/policies/runtimes/crio.py + create mode 100644 sos/report/plugins/openshift_ovn.py + +diff --git a/sos/policies/distros/__init__.py b/sos/policies/distros/__init__.py +index 9c91a918..7acc7e49 100644 +--- a/sos/policies/distros/__init__.py ++++ b/sos/policies/distros/__init__.py +@@ -17,6 +17,7 @@ from sos import _sos as _ + from sos.policies import Policy + from sos.policies.init_systems import InitSystem + from sos.policies.init_systems.systemd import SystemdInit ++from sos.policies.runtimes.crio import CrioContainerRuntime + from sos.policies.runtimes.podman import PodmanContainerRuntime + from sos.policies.runtimes.docker import DockerContainerRuntime + +@@ -92,7 +93,8 @@ class LinuxPolicy(Policy): + if self.probe_runtime: + _crun = [ + PodmanContainerRuntime(policy=self), +- DockerContainerRuntime(policy=self) ++ DockerContainerRuntime(policy=self), ++ CrioContainerRuntime(policy=self) + ] + for runtime in _crun: + if runtime.check_is_active(): +diff --git a/sos/policies/runtimes/__init__.py b/sos/policies/runtimes/__init__.py +index 2e60ad23..4e9a45c1 100644 +--- a/sos/policies/runtimes/__init__.py ++++ b/sos/policies/runtimes/__init__.py +@@ -100,7 +100,7 @@ class ContainerRuntime(): + return None + for c in self.containers: + if re.match(name, c[1]): +- return c[1] ++ return c[0] + return None + + def get_images(self): +diff --git a/sos/policies/runtimes/crio.py b/sos/policies/runtimes/crio.py +new file mode 100644 +index 00000000..980c3ea1 +--- /dev/null ++++ b/sos/policies/runtimes/crio.py +@@ -0,0 +1,79 @@ ++# Copyright (C) 2021 Red Hat, Inc., Nadia Pinaeva ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++from sos.policies.runtimes import ContainerRuntime ++from sos.utilities import sos_get_command_output ++from pipes import quote ++ ++ ++class CrioContainerRuntime(ContainerRuntime): ++ """Runtime class to use for systems running crio""" ++ ++ name = 'crio' ++ binary = 'crictl' ++ ++ def get_containers(self, get_all=False): ++ """Get a list of containers present on the system. ++ ++ :param get_all: If set, include stopped containers as well ++ :type get_all: ``bool`` ++ """ ++ containers = [] ++ _cmd = "%s ps %s" % (self.binary, '-a' if get_all else '') ++ if self.active: ++ out = sos_get_command_output(_cmd, chroot=self.policy.sysroot) ++ if out['status'] == 0: ++ for ent in out['output'].splitlines()[1:]: ++ ent = ent.split() ++ # takes the form (container_id, container_name) ++ containers.append((ent[0], ent[-3])) ++ return containers ++ ++ def get_images(self): ++ """Get a list of images present on the system ++ ++ :returns: A list of 2-tuples containing (image_name, image_id) ++ :rtype: ``list`` ++ """ ++ images = [] ++ if self.active: ++ out = sos_get_command_output("%s images" % self.binary, ++ chroot=self.policy.sysroot) ++ if out['status'] == 0: ++ for ent in out['output'].splitlines(): ++ ent = ent.split() ++ # takes the form (image_name, image_id) ++ images.append((ent[0] + ':' + ent[1], ent[2])) ++ return images ++ ++ def fmt_container_cmd(self, container, cmd, quotecmd): ++ """Format a command to run inside a container using the runtime ++ ++ :param container: The name or ID of the container in which to run ++ :type container: ``str`` ++ ++ :param cmd: The command to run inside `container` ++ :type cmd: ``str`` ++ ++ :param quotecmd: Whether the cmd should be quoted. ++ :type quotecmd: ``bool`` ++ ++ :returns: Formatted string to run `cmd` inside `container` ++ :rtype: ``str`` ++ """ ++ if quotecmd: ++ quoted_cmd = quote(cmd) ++ else: ++ quoted_cmd = cmd ++ container_id = self.get_container_by_name(container) ++ return "%s %s %s" % (self.run_cmd, container_id, ++ quoted_cmd) if container_id is not None else '' ++ ++# vim: set et ts=4 sw=4 : +diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py +new file mode 100644 +index 00000000..168f1dd3 +--- /dev/null ++++ b/sos/report/plugins/openshift_ovn.py +@@ -0,0 +1,41 @@ ++# Copyright (C) 2021 Nadia Pinaeva ++ ++# This file is part of the sos project: https://github.com/sosreport/sos ++# ++# This copyrighted material is made available to anyone wishing to use, ++# modify, copy, or redistribute it subject to the terms and conditions of ++# version 2 of the GNU General Public License. ++# ++# See the LICENSE file in the source distribution for further information. ++ ++from sos.report.plugins import Plugin, RedHatPlugin ++ ++ ++class OpenshiftOVN(Plugin, RedHatPlugin): ++ """This plugin is used to collect OCP 4.x OVN logs. ++ """ ++ short_desc = 'Openshift OVN' ++ plugin_name = "openshift_ovn" ++ containers = ('ovnkube-master', 'ovn-ipsec') ++ profiles = ('openshift',) ++ ++ def setup(self): ++ self.add_copy_spec([ ++ "/var/lib/ovn/etc/ovnnb_db.db", ++ "/var/lib/ovn/etc/ovnsb_db.db", ++ "/var/lib/openvswitch/etc/keys", ++ "/var/log/openvswitch/libreswan.log", ++ "/var/log/openvswitch/ovs-monitor-ipsec.log" ++ ]) ++ ++ self.add_cmd_output([ ++ 'ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ' + ++ 'cluster/status OVN_Northbound', ++ 'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' + ++ 'cluster/status OVN_Southbound'], ++ container='ovnkube-master') ++ self.add_cmd_output([ ++ 'ovs-appctl -t ovs-monitor-ipsec tunnels/show', ++ 'ipsec status', ++ 'certutil -L -d sql:/etc/ipsec.d'], ++ container='ovn-ipsec') +-- +2.31.1 + +From 17218ca17e49cb8491c688095b56503d041c1ae9 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 9 Dec 2021 15:07:23 -0500 +Subject: [PATCH 1/3] [ocp] Skip project setup whenever oc transport is not + used + +Fixes a corner case where we would still attempt to create a new project +within the OCP cluster even if we weren't using the `oc` transport. + +Signed-off-by: Jake Hunsaker +--- + sos/collector/clusters/ocp.py | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 2ce4e977..56f8cc47 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -123,7 +123,9 @@ class ocp(Cluster): + return nodes + + def set_transport_type(self): +- if is_executable('oc') or self.opts.transport == 'oc': ++ if self.opts.transport != 'auto': ++ return self.opts.transport ++ if is_executable('oc'): + return 'oc' + self.log_info("Local installation of 'oc' not found or is not " + "correctly configured. Will use ControlPersist.") +-- +2.31.1 + + +From 9faabdc3df08516a91c1adb3326bbf43db155f71 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 9 Dec 2021 16:04:39 -0500 +Subject: [PATCH 2/3] [crio] Put inspect output in the containers subdir + +Given the environments where crio is run, having `crictl inspect` output +in the main plugin directory can be a bit overwhelming. As such, put +this output into a `containers` subdir, and nest container log output in +a `containers/logs/` subdir. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/crio.py | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/sos/report/plugins/crio.py b/sos/report/plugins/crio.py +index cb2c9796..56cf64a7 100644 +--- a/sos/report/plugins/crio.py ++++ b/sos/report/plugins/crio.py +@@ -79,10 +79,11 @@ class CRIO(Plugin, RedHatPlugin, UbuntuPlugin): + pods = self._get_crio_list(pod_cmd) + + for container in containers: +- self.add_cmd_output("crictl inspect %s" % container) ++ self.add_cmd_output("crictl inspect %s" % container, ++ subdir="containers") + if self.get_option('logs'): + self.add_cmd_output("crictl logs -t %s" % container, +- subdir="containers", priority=100) ++ subdir="containers/logs", priority=100) + + for image in images: + self.add_cmd_output("crictl inspecti %s" % image, subdir="images") +-- +2.31.1 + + +From 9118562c47fb521da3eeeed1a8746d45aaa769e7 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Thu, 9 Dec 2021 16:06:06 -0500 +Subject: [PATCH 3/3] [networking] Put namespaced commands into subdirs + +Where networking namespaces are used, there tend to be large numbers of +namespaces used. This in turn results in sos running and collecting very +large numbers of namespaced commands. + +To aid in consumability, place these collections under a subdir for the +namespace under another "namespaces" subdir within the plugin directory. + +Signed-off-by: Jake Hunsaker +--- + sos/report/plugins/networking.py | 27 ++++++++++++--------------- + 1 file changed, 12 insertions(+), 15 deletions(-) + +diff --git a/sos/report/plugins/networking.py b/sos/report/plugins/networking.py +index 80e24abb..bcb5e6ae 100644 +--- a/sos/report/plugins/networking.py ++++ b/sos/report/plugins/networking.py +@@ -198,6 +198,7 @@ class Networking(Plugin): + pred=SoSPredicate(self, cmd_outputs=co6)) + else None) + for namespace in namespaces: ++ _subdir = "namespaces/%s" % namespace + ns_cmd_prefix = cmd_prefix + namespace + " " + self.add_cmd_output([ + ns_cmd_prefix + "ip address show", +@@ -213,29 +214,27 @@ class Networking(Plugin): + ns_cmd_prefix + "netstat -s", + ns_cmd_prefix + "netstat %s -agn" % self.ns_wide, + ns_cmd_prefix + "nstat -zas", +- ], priority=50) ++ ], priority=50, subdir=_subdir) + self.add_cmd_output([ns_cmd_prefix + "iptables-save"], + pred=iptables_with_nft, ++ subdir=_subdir, + priority=50) + self.add_cmd_output([ns_cmd_prefix + "ip6tables-save"], + pred=ip6tables_with_nft, ++ subdir=_subdir, + priority=50) + + ss_cmd = ns_cmd_prefix + "ss -peaonmi" + # --allow-system-changes is handled directly in predicate + # evaluation, so plugin code does not need to separately + # check for it +- self.add_cmd_output(ss_cmd, pred=ss_pred) +- +- # Collect ethtool commands only when ethtool_namespaces +- # is set to true. +- if self.get_option("ethtool_namespaces"): +- # Devices that exist in a namespace use less ethtool +- # parameters. Run this per namespace. +- for namespace in self.get_network_namespaces( +- self.get_option("namespace_pattern"), +- self.get_option("namespaces")): +- ns_cmd_prefix = cmd_prefix + namespace + " " ++ self.add_cmd_output(ss_cmd, pred=ss_pred, subdir=_subdir) ++ ++ # Collect ethtool commands only when ethtool_namespaces ++ # is set to true. ++ if self.get_option("ethtool_namespaces"): ++ # Devices that exist in a namespace use less ethtool ++ # parameters. Run this per namespace. + netns_netdev_list = self.exec_cmd( + ns_cmd_prefix + "ls -1 /sys/class/net/" + ) +@@ -250,9 +249,7 @@ class Networking(Plugin): + ns_cmd_prefix + "ethtool -i " + eth, + ns_cmd_prefix + "ethtool -k " + eth, + ns_cmd_prefix + "ethtool -S " + eth +- ], priority=50) +- +- return ++ ], priority=50, subdir=_subdir) + + + class RedHatNetworking(Networking, RedHatPlugin): +-- +2.31.1 + +From 4bf5f9143c962c839c1d27217ba74127551a5c00 Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Fri, 17 Dec 2021 11:10:15 -0500 +Subject: [PATCH] [transport] Detect retrieval failures and automatically retry + +If a paritcular attempt to retrieve a remote file fails, we should +automatically retry that collection up to a certain point. This provides +`sos collect` more resiliency for the collection of sos report archives. + +This change necessitates a change in how we handle the SoSNode flow for +failed sos report retrievals, and as such contains minor fixes to +transports to ensure that we do not incorrectly hit exceptions in error +handling that were not previously possible with how we exited the +SoSNode retrieval flow. + +Closes: #2777 + +Signed-off-by: Jake Hunsaker +--- + sos/collector/__init__.py | 5 +++-- + sos/collector/clusters/ocp.py | 1 + + sos/collector/sosnode.py | 17 ++++++++++------- + sos/collector/transports/__init__.py | 15 ++++++++++++++- + sos/collector/transports/local.py | 1 + + sos/collector/transports/oc.py | 3 ++- + 6 files changed, 31 insertions(+), 11 deletions(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index b825d8fc..a25e794e 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -1221,8 +1221,9 @@ this utility or remote systems that it connects to. + def close_all_connections(self): + """Close all sessions for nodes""" + for client in self.client_list: +- self.log_debug('Closing connection to %s' % client.address) +- client.disconnect() ++ if client.connected: ++ self.log_debug('Closing connection to %s' % client.address) ++ client.disconnect() + + def create_cluster_archive(self): + """Calls for creation of tar archive then cleans up the temporary +diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py +index 56f8cc47..ae93ad58 100644 +--- a/sos/collector/clusters/ocp.py ++++ b/sos/collector/clusters/ocp.py +@@ -92,6 +92,7 @@ class ocp(Cluster): + % ret['output']) + # don't leave the config on a non-existing project + self.exec_master_cmd("oc project default") ++ self.project = None + return True + + def _build_dict(self, nodelist): +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 1341e39f..925f2790 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -751,12 +751,11 @@ class SosNode(): + if self.file_exists(path): + self.log_info("Copying remote %s to local %s" % + (path, destdir)) +- self._transport.retrieve_file(path, dest) ++ return self._transport.retrieve_file(path, dest) + else: + self.log_debug("Attempting to copy remote file %s, but it " + "does not exist on filesystem" % path) + return False +- return True + except Exception as err: + self.log_debug("Failed to retrieve %s: %s" % (path, err)) + return False +@@ -793,16 +792,20 @@ class SosNode(): + except Exception: + self.log_error('Failed to make archive readable') + return False +- self.soslog.info('Retrieving sos report from %s' % self.address) ++ self.log_info('Retrieving sos report from %s' % self.address) + self.ui_msg('Retrieving sos report...') +- ret = self.retrieve_file(self.sos_path) ++ try: ++ ret = self.retrieve_file(self.sos_path) ++ except Exception as err: ++ self.log_error(err) ++ return False + if ret: + self.ui_msg('Successfully collected sos report') + self.file_list.append(self.sos_path.split('/')[-1]) ++ return True + else: +- self.log_error('Failed to retrieve sos report') +- raise SystemExit +- return True ++ self.ui_msg('Failed to retrieve sos report') ++ return False + else: + # sos sometimes fails but still returns a 0 exit code + if self.stderr.read(): +diff --git a/sos/collector/transports/__init__.py b/sos/collector/transports/__init__.py +index 33f2f66d..dcdebdde 100644 +--- a/sos/collector/transports/__init__.py ++++ b/sos/collector/transports/__init__.py +@@ -303,7 +303,20 @@ class RemoteTransport(): + :returns: True if file was successfully copied from remote, or False + :rtype: ``bool`` + """ +- return self._retrieve_file(fname, dest) ++ attempts = 0 ++ try: ++ while attempts < 5: ++ attempts += 1 ++ ret = self._retrieve_file(fname, dest) ++ if ret: ++ return True ++ self.log_info("File retrieval attempt %s failed" % attempts) ++ self.log_info("File retrieval failed after 5 attempts") ++ return False ++ except Exception as err: ++ self.log_error("Exception encountered during retrieval attempt %s " ++ "for %s: %s" % (attempts, fname, err)) ++ raise err + + def _retrieve_file(self, fname, dest): + raise NotImplementedError("Transport %s does not support file copying" +diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py +index a4897f19..2996d524 100644 +--- a/sos/collector/transports/local.py ++++ b/sos/collector/transports/local.py +@@ -35,6 +35,7 @@ class LocalTransport(RemoteTransport): + def _retrieve_file(self, fname, dest): + self.log_debug("Moving %s to %s" % (fname, dest)) + shutil.copy(fname, dest) ++ return True + + def _format_cmd_for_exec(self, cmd): + return cmd +diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py +index de044ccb..720dd61d 100644 +--- a/sos/collector/transports/oc.py ++++ b/sos/collector/transports/oc.py +@@ -202,7 +202,8 @@ class OCTransport(RemoteTransport): + env, False) + + def _disconnect(self): +- os.unlink(self.pod_tmp_conf) ++ if os.path.exists(self.pod_tmp_conf): ++ os.unlink(self.pod_tmp_conf) + removed = self.run_oc("delete pod %s" % self.pod_name) + if "deleted" not in removed['output']: + self.log_debug("Calling delete on pod '%s' failed: %s" +-- +2.31.1 + +From 304c9ef6c1015f1ebe1a8d569c3e16bada4d23f1 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Tue, 4 Jan 2022 16:37:09 +0100 +Subject: [PATCH] Add cluster cleanup for all exit() calls + +Signed-off-by: Nadia Pinaeva +--- + sos/collector/__init__.py | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index a25e794e1..ffd63bc63 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -443,6 +443,7 @@ def add_parser_options(cls, parser): + + def exit(self, msg, error=1): + """Used to safely terminate if sos-collector encounters an error""" ++ self.cluster.cleanup() + self.log_error(msg) + try: + self.close_all_connections() +@@ -858,8 +858,9 @@ class SoSCollector(SoSComponent): + "CTRL-C to quit\n") + self.ui_log.info("") + except KeyboardInterrupt: +- self.cluster.cleanup() + self.exit("Exiting on user cancel", 130) ++ except Exception as e: ++ self.exit(repr(e), 1) + + def configure_sos_cmd(self): + """Configures the sosreport command that is run on the nodes""" +@@ -1185,7 +1185,6 @@ def collect(self): + arc_name = self.create_cluster_archive() + else: + msg = 'No sosreports were collected, nothing to archive...' +- self.cluster.cleanup() + self.exit(msg, 1) + + if self.opts.upload and self.policy.get_upload_url(): +From 2c3a647817dfbac36be3768acf6026e91d1a6e8f Mon Sep 17 00:00:00 2001 +From: Jake Hunsaker +Date: Tue, 21 Dec 2021 14:20:19 -0500 +Subject: [PATCH] [options] Allow spaces in --keywords values in sos.conf + +The `--keywords` option supports spaces to allow for obfuscated phrases, +not just words. This however breaks if a phrase is added to the config +file *before* a run with the phrase in the cmdline option, due to the +safeguards we have for all other values that do not support spaces. + +Add a check in our flow for updating options from the config file to not +replace illegal spaces if we're checking the `keywords` option, for +which spaces are legal. + +Signed-off-by: Jake Hunsaker +--- + sos/options.py | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/sos/options.py b/sos/options.py +index 7bea3ffc1..4846a5096 100644 +--- a/sos/options.py ++++ b/sos/options.py +@@ -200,7 +200,10 @@ def _update_from_section(section, config): + odict[rename_opts[key]] = odict.pop(key) + # set the values according to the config file + for key, val in odict.items(): +- if isinstance(val, str): ++ # most option values do not tolerate spaces, special ++ # exception however for --keywords which we do want to ++ # support phrases, and thus spaces, for ++ if isinstance(val, str) and key != 'keywords': + val = val.replace(' ', '') + if key not in self.arg_defaults: + # read an option that is not loaded by the current +From f912fc9e31b406a24b7a9c012e12cda920632051 Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Mon, 10 Jan 2022 14:13:42 +0100 +Subject: [PATCH] [collect] Deal None sos_version properly + +In case collector cluster hits an error during init, sos_version +is None what LooseVersion can't compare properly and raises exception + +'LooseVersion' object has no attribute 'version' + +Related: #2822 + +Signed-off-by: Pavel Moravec +--- + sos/collector/sosnode.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/sosnode.py b/sos/collector/sosnode.py +index 925f27909..7bbe0cd1f 100644 +--- a/sos/collector/sosnode.py ++++ b/sos/collector/sosnode.py +@@ -382,7 +382,8 @@ def check_sos_version(self, ver): + given ver. This means that if the installed version is greater than + ver, this will still return True + """ +- return LooseVersion(self.sos_info['version']) >= ver ++ return self.sos_info['version'] is not None and \ ++ LooseVersion(self.sos_info['version']) >= ver + + def is_installed(self, pkg): + """Checks if a given package is installed on the node""" +From 0c67e8ebaeef17dac3b5b9e42a59b4e673e4403b Mon Sep 17 00:00:00 2001 +From: Pavel Moravec +Date: Mon, 10 Jan 2022 14:17:13 +0100 +Subject: [PATCH] [collector] Cleanup cluster only if defined + +In case cluster init fails, self.cluster = None and its cleanup +must be skipped. + +Resolves: #2822 + +Signed-off-by: Pavel Moravec +--- + sos/collector/__init__.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py +index ffd63bc63..3e22bca3e 100644 +--- a/sos/collector/__init__.py ++++ b/sos/collector/__init__.py +@@ -443,7 +443,8 @@ def add_parser_options(cls, parser): + + def exit(self, msg, error=1): + """Used to safely terminate if sos-collector encounters an error""" +- self.cluster.cleanup() ++ if self.cluster: ++ self.cluster.cleanup() + self.log_error(msg) + try: + self.close_all_connections() +From ef27a6ee6737c23b3beda1437768a91679024697 Mon Sep 17 00:00:00 2001 +From: Nadia Pinaeva +Date: Fri, 3 Dec 2021 15:41:35 +0100 +Subject: [PATCH] Add journal logs for NetworkManager plugin + +Signed-off-by: Nadia Pinaeva +--- + sos/report/plugins/networkmanager.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sos/report/plugins/networkmanager.py b/sos/report/plugins/networkmanager.py +index 30f99a1140..3aca0c7460 100644 +--- a/sos/report/plugins/networkmanager.py ++++ b/sos/report/plugins/networkmanager.py +@@ -25,6 +25,8 @@ def setup(self): + "/etc/NetworkManager/dispatcher.d" + ]) + ++ self.add_journal(units="NetworkManager") ++ + # There are some incompatible changes in nmcli since + # the release of NetworkManager >= 0.9.9. In addition, + # NetworkManager >= 0.9.9 will use the long names of + diff --git a/SPECS/sos.spec b/SPECS/sos.spec index b1cd45d..5094ccc 100644 --- a/SPECS/sos.spec +++ b/SPECS/sos.spec @@ -5,7 +5,7 @@ Summary: A set of tools to gather troubleshooting information from a system Name: sos Version: 4.2 -Release: 8%{?dist} +Release: 11%{?dist} Group: Applications/System Source0: https://github.com/sosreport/sos/archive/%{version}/sos-%{version}.tar.gz Source1: sos-audit-%{auditversion}.tgz @@ -41,6 +41,7 @@ Patch14: sos-bz2024893-cleaner-hostnames-improvements.patch Patch15: sos-bz2025611-RHTS-api-change.patch Patch16: sos-bz2034001-nvidia-GPU-info.patch Patch17: sos-bz2031777-rhui-logs.patch +Patch18: sos-bz2037350-ocp-backports.patch %description Sos is a set of tools that gathers information about system @@ -68,6 +69,7 @@ support technicians and developers. %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 %build %py3_build @@ -135,6 +137,18 @@ of the system. Currently storage and filesystem commands are audited. %changelog +* Mon Jan 10 2022 Pavel Moravec = 4.2-11 +- [reort] Add journal logs for NetworkManager plugin + Resolves: bz2037350 + +* Fri Jan 07 2022 Pavel Moravec = 4.2-9 +- add oc transport, backport various PRs for OCP + Resolves: bz2037350 +- [report] Provide better warning about estimate-mode + Resolves: bz2011537 +- [hostname] Fix loading and detection of long base domains + Resolves: bz2024893 + * Sun Dec 19 2021 Pavel Moravec = 4.2-8 - [rhui] New log folder Resolves: bz2031777