Blame SOURCES/sos-bz2071825-merged-8.6.z.patch

0a180f
From cc60fa5ee25bffed9203a4f786256185b7fe0115 Mon Sep 17 00:00:00 2001
0a180f
From: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
Date: Tue, 15 Mar 2022 11:49:57 +0100
0a180f
Subject: [PATCH] Add ovs datapath and groups collection commands Add
0a180f
 ct-zone-list command for openshift-ovn
0a180f
0a180f
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
---
0a180f
 sos/report/plugins/openshift_ovn.py | 4 ++++
0a180f
 sos/report/plugins/openvswitch.py   | 3 +++
0a180f
 2 files changed, 7 insertions(+)
0a180f
0a180f
diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py
0a180f
index 168f1dd3..b4787b8e 100644
0a180f
--- a/sos/report/plugins/openshift_ovn.py
0a180f
+++ b/sos/report/plugins/openshift_ovn.py
0a180f
@@ -34,6 +34,10 @@ class OpenshiftOVN(Plugin, RedHatPlugin):
0a180f
             'ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ' +
0a180f
             'cluster/status OVN_Southbound'],
0a180f
             container='ovnkube-master')
0a180f
+        self.add_cmd_output([
0a180f
+            'ovs-appctl -t /var/run/ovn/ovn-controller.*.ctl ' +
0a180f
+            'ct-zone-list'],
0a180f
+            container='ovnkube-node')
0a180f
         self.add_cmd_output([
0a180f
             'ovs-appctl -t ovs-monitor-ipsec tunnels/show',
0a180f
             'ipsec status',
0a180f
diff --git a/sos/report/plugins/openvswitch.py b/sos/report/plugins/openvswitch.py
0a180f
index 179d1532..159b0bd2 100644
0a180f
--- a/sos/report/plugins/openvswitch.py
0a180f
+++ b/sos/report/plugins/openvswitch.py
0a180f
@@ -124,6 +124,8 @@ class OpenVSwitch(Plugin):
0a180f
             "ovs-vsctl -t 5 list interface",
0a180f
             # Capture OVS detailed information from all the bridges
0a180f
             "ovs-vsctl -t 5 list bridge",
0a180f
+            # Capture OVS datapath list
0a180f
+            "ovs-vsctl -t 5 list datapath",
0a180f
             # Capture DPDK queue to pmd mapping
0a180f
             "ovs-appctl dpif-netdev/pmd-rxq-show",
0a180f
             # Capture DPDK pmd stats
0a180f
@@ -229,6 +231,7 @@ class OpenVSwitch(Plugin):
0a180f
                     "ovs-ofctl queue-get-config %s" % br,
0a180f
                     "ovs-ofctl queue-stats %s" % br,
0a180f
                     "ovs-ofctl show %s" % br,
0a180f
+                    "ovs-ofctl dump-groups %s" % br,
0a180f
                 ])
0a180f
 
0a180f
                 # Flow protocols currently supported
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From af40be92f502b35fa9d39ce4d4fea7d80c367830 Mon Sep 17 00:00:00 2001
0a180f
From: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
Date: Tue, 15 Mar 2022 13:09:55 +0100
0a180f
Subject: [PATCH] Improve sos collect for OCP: 1. wait for sos tmp project to
0a180f
 be deleted (just calling delete changes project state to Terminating, and
0a180f
 running a new sos collect is not possible before this project is fully
0a180f
 deleted) 2. use --retries flag to copy sos reports from the nodes more
0a180f
 reliably. The flag has been recently added to kubectl, and the most reliable
0a180f
 way to check if it's available or not is to check command error output for
0a180f
 "unknown flag" substring
0a180f
0a180f
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
---
0a180f
 sos/collector/clusters/ocp.py  | 5 +++++
0a180f
 sos/collector/transports/oc.py | 6 +++++-
0a180f
 2 files changed, 10 insertions(+), 1 deletion(-)
0a180f
0a180f
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
0a180f
index f1714239..9beb2f9b 100644
0a180f
--- a/sos/collector/clusters/ocp.py
0a180f
+++ b/sos/collector/clusters/ocp.py
0a180f
@@ -128,6 +128,11 @@
0a180f
             if not ret['status'] == 0:
0a180f
                 self.log_error("Error deleting temporary project: %s"
0a180f
                                % ret['output'])
0a180f
+            ret = self.exec_primary_cmd("oc wait namespace/%s --for=delete "
0a180f
+                                        "--timeout=30s" % self.project)
0a180f
+            if not ret['status'] == 0:
0a180f
+                self.log_error("Error waiting for temporary project to be "
0a180f
+                               "deleted: %s" % ret['output'])
0a180f
             # don't leave the config on a non-existing project
0a180f
             self.exec_master_cmd("oc project default")
0a180f
             self.project = None
0a180f
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
0a180f
index 0fc9eee8..90a802b2 100644
0a180f
--- a/sos/collector/transports/oc.py
0a180f
+++ b/sos/collector/transports/oc.py
0a180f
@@ -231,5 +231,9 @@ class OCTransport(RemoteTransport):
0a180f
                 % (self.project, self.pod_name))
0a180f
 
0a180f
     def _retrieve_file(self, fname, dest):
0a180f
-        cmd = self.run_oc("cp %s:%s %s" % (self.pod_name, fname, dest))
0a180f
+        # check if --retries flag is available for given version of oc
0a180f
+        result = self.run_oc("cp --retries", stderr=True)
0a180f
+        flags = '' if "unknown flag" in result["output"] else '--retries=5'
0a180f
+        cmd = self.run_oc("cp %s %s:%s %s"
0a180f
+                          % (flags, self.pod_name, fname, dest))
0a180f
         return cmd['status'] == 0
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From 3b0676b90ff65f20eaba3062775ff72b89386ffc Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Tue, 22 Mar 2022 14:25:24 -0400
0a180f
Subject: [PATCH] [Plugin] Allow plugins to define default command environment
0a180f
 vars
0a180f
0a180f
Adds the ability for plugins to define a default set of environment vars
0a180f
to pass to all commands executed by the plugin. This may be done either
0a180f
via the new `set_default_cmd_environment()` or
0a180f
`add_default_cmd_environment()` methods. The former will override any
0a180f
previously set values, whereas the latter will add/update/modify any
0a180f
existing values.
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/report/plugins/__init__.py                | 55 ++++++++++++++++++-
0a180f
 .../plugin_tests/plugin_environment.py        | 44 +++++++++++++++
0a180f
 .../fake_plugins/default_env_test.py          | 28 ++++++++++
0a180f
 tests/unittests/plugin_tests.py               | 15 +++++
0a180f
 4 files changed, 140 insertions(+), 2 deletions(-)
0a180f
 create mode 100644 tests/report_tests/plugin_tests/plugin_environment.py
0a180f
 create mode 100644 tests/test_data/fake_plugins/default_env_test.py
0a180f
0a180f
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
0a180f
index 336b4d22..74b4f4be 100644
0a180f
--- a/sos/report/plugins/__init__.py
0a180f
+++ b/sos/report/plugins/__init__.py
0a180f
@@ -571,6 +571,7 @@ class Plugin():
0a180f
         self.manifest = None
0a180f
         self.skip_files = commons['cmdlineopts'].skip_files
0a180f
         self.skip_commands = commons['cmdlineopts'].skip_commands
0a180f
+        self.default_environment = {}
0a180f
 
0a180f
         self.soslog = self.commons['soslog'] if 'soslog' in self.commons \
0a180f
             else logging.getLogger('sos')
0a180f
@@ -542,6 +542,52 @@
0a180f
         self.manifest.add_list('files', [])
0a180f
         self.manifest.add_field('containers', {})
0a180f
 
0a180f
+    def set_default_cmd_environment(self, env_vars):
0a180f
+        """
0a180f
+        Specify a collection of environment variables that should always be
0a180f
+        passed to commands being executed by this plugin.
0a180f
+
0a180f
+        :param env_vars:    The environment variables and their values to set
0a180f
+        :type env_vars:     ``dict{ENV_VAR_NAME: ENV_VAR_VALUE}``
0a180f
+        """
0a180f
+        if not isinstance(env_vars, dict):
0a180f
+            raise TypeError(
0a180f
+                "Environment variables for Plugin must be specified by dict"
0a180f
+            )
0a180f
+        self.default_environment = env_vars
0a180f
+        self._log_debug("Default environment for all commands now set to %s"
0a180f
+                        % self.default_environment)
0a180f
+
0a180f
+    def add_default_cmd_environment(self, env_vars):
0a180f
+        """
0a180f
+        Add or modify a specific environment variable in the set of default
0a180f
+        environment variables used by this Plugin.
0a180f
+
0a180f
+        :param env_vars:    The environment variables to add to the current
0a180f
+                            set of env vars in use
0a180f
+        :type env_vars:     ``dict``
0a180f
+        """
0a180f
+        if not isinstance(env_vars, dict):
0a180f
+            raise TypeError("Environment variables must be added via dict")
0a180f
+        self._log_debug("Adding %s to default environment" % env_vars)
0a180f
+        self.default_environment.update(env_vars)
0a180f
+
0a180f
+    def _get_cmd_environment(self, env=None):
0a180f
+        """
0a180f
+        Get the merged set of environment variables for a command about to be
0a180f
+        executed by this plugin.
0a180f
+
0a180f
+        :returns: The set of env vars to use for a command
0a180f
+        :rtype: ``dict``
0a180f
+        """
0a180f
+        if env is None:
0a180f
+            return self.default_environment
0a180f
+        if not isinstance(env, dict):
0a180f
+            raise TypeError("Command env vars must be passed as dict")
0a180f
+        _env = self.default_environment.copy()
0a180f
+        _env.update(env)
0a180f
+        return _env
0a180f
+
0a180f
     def timeout_from_options(self, optname, plugoptname, default_timeout):
0a180f
         """Returns either the default [plugin|cmd] timeout value, the value as
0a180f
         provided on the commandline via -k plugin.[|cmd-]timeout=value, or the
0a180f
@@ -2258,6 +2305,8 @@ class Plugin():
0a180f
 
0a180f
         _tags = list(set(_tags))
0a180f
 
0a180f
+        _env = self._get_cmd_environment(env)
0a180f
+
0a180f
         if chroot or self.commons['cmdlineopts'].chroot == 'always':
0a180f
             root = self.sysroot
0a180f
         else:
0a180f
@@ -2068,7 +2068,7 @@
0a180f
 
0a180f
         result = sos_get_command_output(
0a180f
             cmd, timeout=timeout, stderr=stderr, chroot=root,
0a180f
-            chdir=runat, env=env, binary=binary, sizelimit=sizelimit,
0a180f
+            chdir=runat, env=_env, binary=binary, sizelimit=sizelimit,
0a180f
             poller=self.check_timeout, foreground=foreground
0a180f
         )
0a180f
 
0a180f
@@ -2510,6 +2559,8 @@ class Plugin():
0a180f
         else:
0a180f
             root = None
0a180f
 
0a180f
+        _env = self._get_cmd_environment(env)
0a180f
+
0a180f
         if container:
0a180f
             if self._get_container_runtime() is None:
0a180f
                 self._log_info("Cannot run cmd '%s' in container %s: no "
0a180f
@@ -2522,7 +2573,7 @@ class Plugin():
0a180f
                                "container is running." % (cmd, container))
0a180f
 
0a180f
         return sos_get_command_output(cmd, timeout=timeout, chroot=root,
0a180f
-                                      chdir=runat, binary=binary, env=env,
0a180f
+                                      chdir=runat, binary=binary, env=_env,
0a180f
                                       foreground=foreground, stderr=stderr)
0a180f
 
0a180f
     def _add_container_file_to_manifest(self, container, path, arcpath, tags):
0a180f
diff --git a/tests/report_tests/plugin_tests/plugin_environment.py b/tests/report_tests/plugin_tests/plugin_environment.py
0a180f
new file mode 100644
0a180f
index 00000000..3158437a
0a180f
--- /dev/null
0a180f
+++ b/tests/report_tests/plugin_tests/plugin_environment.py
0a180f
@@ -0,0 +1,44 @@
0a180f
+# This file is part of the sos project: https://github.com/sosreport/sos
0a180f
+#
0a180f
+# This copyrighted material is made available to anyone wishing to use,
0a180f
+# modify, copy, or redistribute it subject to the terms and conditions of
0a180f
+# version 2 of the GNU General Public License.
0a180f
+#
0a180f
+# See the LICENSE file in the source distribution for further information.
0a180f
+
0a180f
+import os
0a180f
+
0a180f
+from sos_tests import StageTwoReportTest
0a180f
+
0a180f
+
0a180f
+class PluginDefaultEnvironmentTest(StageTwoReportTest):
0a180f
+    """
0a180f
+    Ensure that being able to set a default set of environment variables is
0a180f
+    working correctly and does not leave a lingering env var on the system
0a180f
+
0a180f
+    :avocado: tags=stageone
0a180f
+    """
0a180f
+
0a180f
+    install_plugins = ['default_env_test']
0a180f
+    sos_cmd = '-o default_env_test'
0a180f
+
0a180f
+    def test_environment_used_in_cmd(self):
0a180f
+        self.assertFileHasContent(
0a180f
+            'sos_commands/default_env_test/env_var_test',
0a180f
+            'Does Linus play hockey?'
0a180f
+        )
0a180f
+
0a180f
+    def test_environment_setting_logged(self):
0a180f
+        self.assertSosLogContains(
0a180f
+            'Default environment for all commands now set to'
0a180f
+        )
0a180f
+
0a180f
+    def test_environment_not_set_on_host(self):
0a180f
+        self.assertTrue('TORVALDS' not in os.environ)
0a180f
+        self.assertTrue('GREATESTSPORT' not in os.environ)
0a180f
+
0a180f
+    def test_environment_not_captured(self):
0a180f
+        # we should still have an empty environment file
0a180f
+        self.assertFileCollected('environment')
0a180f
+        self.assertFileNotHasContent('environment', 'TORVALDS')
0a180f
+        self.assertFileNotHasContent('environment', 'GREATESTSPORT')
0a180f
diff --git a/tests/test_data/fake_plugins/default_env_test.py b/tests/test_data/fake_plugins/default_env_test.py
0a180f
new file mode 100644
0a180f
index 00000000..d1d1fb78
0a180f
--- /dev/null
0a180f
+++ b/tests/test_data/fake_plugins/default_env_test.py
0a180f
@@ -0,0 +1,28 @@
0a180f
+# This file is part of the sos project: https://github.com/sosreport/sos
0a180f
+#
0a180f
+# This copyrighted material is made available to anyone wishing to use,
0a180f
+# modify, copy, or redistribute it subject to the terms and conditions of
0a180f
+# version 2 of the GNU General Public License.
0a180f
+#
0a180f
+# See the LICENSE file in the source distribution for further information.
0a180f
+
0a180f
+from sos.report.plugins import Plugin, IndependentPlugin
0a180f
+
0a180f
+
0a180f
+class DefaultEnv(Plugin, IndependentPlugin):
0a180f
+
0a180f
+    plugin_name = 'default_env_test'
0a180f
+    short_desc = 'Fake plugin to test default env var handling'
0a180f
+
0a180f
+    def setup(self):
0a180f
+        self.set_default_cmd_environment({
0a180f
+            'TORVALDS': 'Linus',
0a180f
+            'GREATESTSPORT': 'hockey'
0a180f
+        })
0a180f
+
0a180f
+        self.add_cmd_output(
0a180f
+            "sh -c 'echo Does '$TORVALDS' play '$GREATESTSPORT'?'",
0a180f
+            suggest_filename='env_var_test'
0a180f
+        )
0a180f
+
0a180f
+        self.add_env_var(['TORVALDS', 'GREATESTSPORT'])
0a180f
diff --git a/tests/unittests/plugin_tests.py b/tests/unittests/plugin_tests.py
0a180f
index 0dfa243d..e469b78e 100644
0a180f
--- a/tests/unittests/plugin_tests.py
0a180f
+++ b/tests/unittests/plugin_tests.py
0a180f
@@ -305,6 +305,21 @@ class PluginTests(unittest.TestCase):
0a180f
         p.postproc()
0a180f
         self.assertTrue(p.did_postproc)
0a180f
 
0a180f
+    def test_set_default_cmd_env(self):
0a180f
+        p = MockPlugin({
0a180f
+            'sysroot': self.sysroot,
0a180f
+            'policy': LinuxPolicy(init=InitSystem(), probe_runtime=False),
0a180f
+            'cmdlineopts': MockOptions(),
0a180f
+            'devices': {}
0a180f
+        })
0a180f
+        e = {'TORVALDS': 'Linus'}
0a180f
+        p.set_default_cmd_environment(e)
0a180f
+        self.assertEquals(p.default_environment, e)
0a180f
+        add_e = {'GREATESTSPORT': 'hockey'}
0a180f
+        p.add_default_cmd_environment(add_e)
0a180f
+        self.assertEquals(p.default_environment['GREATESTSPORT'], 'hockey')
0a180f
+        self.assertEquals(p.default_environment['TORVALDS'], 'Linus')
0a180f
+
0a180f
 
0a180f
 class AddCopySpecTests(unittest.TestCase):
0a180f
 
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From 1e12325efaa500d304dcbfbeeb50e72ed0f938f5 Mon Sep 17 00:00:00 2001
0a180f
From: Vladislav Walek <22072258+vwalek@users.noreply.github.com>
0a180f
Date: Thu, 17 Mar 2022 14:10:26 -0700
0a180f
Subject: [PATCH] [openshift] Adding ability to use the localhost.kubeconfig
0a180f
 and KUBECONFIG env to use system:admin
0a180f
0a180f
Signed-off-by: Vladislav Walek <22072258+vwalek@users.noreply.github.com>
0a180f
---
0a180f
 sos/report/plugins/openshift.py | 45 +++++++++++++++++++++++++++++++--
0a180f
 1 file changed, 43 insertions(+), 2 deletions(-)
0a180f
0a180f
diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py
0a180f
index 5ae38178..d643f04c 100644
0a180f
--- a/sos/report/plugins/openshift.py
0a180f
+++ b/sos/report/plugins/openshift.py
0a180f
@@ -60,11 +60,18 @@
0a180f
     profiles = ('openshift',)
0a180f
     packages = ('openshift-hyperkube',)
0a180f
 
0a180f
+    master_localhost_kubeconfig = (
0a180f
+        '/etc/kubernetes/static-pod-resources/'
0a180f
+        'kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig'
0a180f
+        )
0a180f
+
0a180f
     option_list = [
0a180f
         ('token', 'admin token to allow API queries', 'fast', None),
0a180f
+        ('kubeconfig', 'Path to a locally available kubeconfig file',
0a180f
+         'fast', None),
0a180f
         ('host', 'host address to use for oc login, including port', 'fast',
0a180f
          'https://localhost:6443'),
0a180f
-        ('no-oc', 'do not collect `oc` command output', 'fast', False),
0a180f
+        ('no-oc', 'do not collect `oc` command output', 'fast', True),
0a180f
         ('podlogs', 'collect logs from each pod', 'fast', True),
0a180f
         ('podlogs-filter', ('limit podlogs collection to pods matching this '
0a180f
          'regex'), 'fast', ''),
0a180f
@@ -73,6 +80,10 @@ class Openshift(Plugin, RedHatPlugin):
0a180f
         """Check to see if we can run `oc` commands"""
0a180f
         return self.exec_cmd('oc whoami')['status'] == 0
0a180f
 
0a180f
+    def _check_localhost_kubeconfig(self):
0a180f
+        """Check if the localhost.kubeconfig exists with system:admin user"""
0a180f
+        return self.path_exists(self.get_option('kubeconfig'))
0a180f
+
0a180f
     def _check_oc_logged_in(self):
0a180f
         """See if we're logged in to the API service, and if not attempt to do
0a180f
         so using provided plugin options
0a180f
@@ -80,8 +91,38 @@ class Openshift(Plugin, RedHatPlugin):
0a180f
         if self._check_oc_function():
0a180f
             return True
0a180f
 
0a180f
-        # Not logged in currently, attempt to do so
0a180f
+        if self.get_option('kubeconfig') is None:
0a180f
+            # If admin doesn't add the kubeconfig
0a180f
+            # use default localhost.kubeconfig
0a180f
+            self.set_option(
0a180f
+                'kubeconfig',
0a180f
+                self.master_localhost_kubeconfig
0a180f
+            )
0a180f
+
0a180f
+        # Check first if we can use the localhost.kubeconfig before
0a180f
+        # using token. We don't want to use 'host' option due we use
0a180f
+        # cluster url from kubeconfig. Default is localhost.
0a180f
+        if self._check_localhost_kubeconfig():
0a180f
+            self.set_default_cmd_environment({
0a180f
+                'KUBECONFIG': self.get_option('kubeconfig')
0a180f
+            })
0a180f
+
0a180f
+            oc_res = self.exec_cmd(
0a180f
+                "oc login -u system:admin "
0a180f
+                "--insecure-skip-tls-verify=True"
0a180f
+            )
0a180f
+            if oc_res['status'] == 0 and self._check_oc_function():
0a180f
+                return True
0a180f
+
0a180f
+            self._log_warn(
0a180f
+                "The login command failed with status: %s and error: %s"
0a180f
+                % (oc_res['status'], oc_res['output'])
0a180f
+            )
0a180f
+            return False
0a180f
+
0a180f
+        # If kubeconfig is not defined, check if token is provided.
0a180f
         token = self.get_option('token') or os.getenv('SOSOCPTOKEN', None)
0a180f
+
0a180f
         if token:
0a180f
             oc_res = self.exec_cmd("oc login %s --token=%s "
0a180f
                                    "--insecure-skip-tls-verify=True"
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From 3b84b4ccfa9e4924a5a3829d3810568dfb69bf63 Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Fri, 18 Mar 2022 16:25:35 -0400
0a180f
Subject: [PATCH 1/2] [pacemaker] Redesign node enumeration logic
0a180f
0a180f
It has been found that `pcs status` output is liable to change, which
0a180f
ends up breaking our parsing of node lists when using it on newer
0a180f
versions.
0a180f
0a180f
Instead, first try to parse through `crm_mon` output, which is what `pcs
0a180f
status` uses under the hood, but as a stable and reliable xml format.
0a180f
0a180f
Failing that, for example if the `--primary` node is not functioning as
0a180f
part of the cluster, source `/etc/corosync/corosync.conf` instead.
0a180f
0a180f
Related: RHBZ2065805
0a180f
Related: RHBZ2065811
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/collector/clusters/pacemaker.py | 110 +++++++++++++++++++---------
0a180f
 1 file changed, 76 insertions(+), 34 deletions(-)
0a180f
0a180f
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
0a180f
index 55024314..49d0ce51 100644
0a180f
--- a/sos/collector/clusters/pacemaker.py
0a180f
+++ b/sos/collector/clusters/pacemaker.py
0a180f
@@ -8,7 +8,11 @@
0a180f
 #
0a180f
 # See the LICENSE file in the source distribution for further information.
0a180f
 
0a180f
+import re
0a180f
+
0a180f
 from sos.collector.clusters import Cluster
0a180f
+from setuptools._vendor.packaging import version
0a180f
+from xml.etree import ElementTree
0a180f
 
0a180f
 
0a180f
 class pacemaker(Cluster):
0a180f
@@ -18,42 +22,80 @@ class pacemaker(Cluster):
0a180f
     packages = ('pacemaker',)
0a180f
     option_list = [
0a180f
         ('online', True, 'Collect nodes listed as online'),
0a180f
-        ('offline', True, 'Collect nodes listed as offline')
0a180f
+        ('offline', True, 'Collect nodes listed as offline'),
0a180f
+        ('only-corosync', False, 'Only use corosync.conf to enumerate nodes')
0a180f
     ]
0a180f
 
0a180f
     def get_nodes(self):
0a180f
-        self.res = self.exec_primary_cmd('pcs status')
0a180f
-        if self.res['status'] != 0:
0a180f
-            self.log_error('Cluster status could not be determined. Is the '
0a180f
-                           'cluster running on this node?')
0a180f
-            return []
0a180f
-        if 'node names do not match' in self.res['output']:
0a180f
-            self.log_warn('Warning: node name mismatch reported. Attempts to '
0a180f
-                          'connect to some nodes may fail.\n')
0a180f
-        return self.parse_pcs_output()
0a180f
-
0a180f
-    def parse_pcs_output(self):
0a180f
-        nodes = []
0a180f
-        if self.get_option('online'):
0a180f
-            nodes += self.get_online_nodes()
0a180f
-        if self.get_option('offline'):
0a180f
-            nodes += self.get_offline_nodes()
0a180f
-        return nodes
0a180f
-
0a180f
-    def get_online_nodes(self):
0a180f
-        for line in self.res['output'].splitlines():
0a180f
-            if line.startswith('Online:'):
0a180f
-                nodes = line.split('[')[1].split(']')[0]
0a180f
-                return [n for n in nodes.split(' ') if n]
0a180f
-
0a180f
-    def get_offline_nodes(self):
0a180f
-        offline = []
0a180f
-        for line in self.res['output'].splitlines():
0a180f
-            if line.startswith('Node') and line.endswith('(offline)'):
0a180f
-                offline.append(line.split()[1].replace(':', ''))
0a180f
-            if line.startswith('OFFLINE:'):
0a180f
-                nodes = line.split('[')[1].split(']')[0]
0a180f
-                offline.extend([n for n in nodes.split(' ') if n])
0a180f
-        return offline
0a180f
+        self.nodes = []
0a180f
+        # try crm_mon first
0a180f
+        try:
0a180f
+            if not self.get_option('only-corosync'):
0a180f
+                try:
0a180f
+                    self.get_nodes_from_crm()
0a180f
+                except Exception as err:
0a180f
+                    self.log_warn("Falling back to sourcing corosync.conf. "
0a180f
+                                  "Could not parse crm_mon output: %s" % err)
0a180f
+            if not self.nodes:
0a180f
+                # fallback to corosync.conf, in case the node we're inspecting
0a180f
+                # is offline from the cluster
0a180f
+                self.get_nodes_from_corosync()
0a180f
+        except Exception as err:
0a180f
+            self.log_error("Could not determine nodes from cluster: %s" % err)
0a180f
+
0a180f
+        _shorts = [n for n in self.nodes if '.' not in n]
0a180f
+        if _shorts:
0a180f
+            self.log_warn(
0a180f
+                "WARNING: Node addresses '%s' may not resolve locally if you "
0a180f
+                "are not running on a node in the cluster. Try using option "
0a180f
+                "'-c pacemaker.only-corosync' if these connections fail."
0a180f
+                % ','.join(_shorts)
0a180f
+            )
0a180f
+        return self.nodes
0a180f
+
0a180f
+    def get_nodes_from_crm(self):
0a180f
+        """
0a180f
+        Try to parse crm_mon output for node list and status.
0a180f
+        """
0a180f
+        xmlopt = '--output-as=xml'
0a180f
+        # older pacemaker had a different option for xml output
0a180f
+        _ver = self.exec_primary_cmd('crm_mon --version')
0a180f
+        if _ver['status'] == 0:
0a180f
+            cver = _ver['output'].split()[1].split('-')[0]
0a180f
+            if not version.parse(cver) > version.parse('2.0.3'):
0a180f
+                xmlopt = '--as-xml'
0a180f
+        else:
0a180f
+            return
0a180f
+        _out = self.exec_primary_cmd(
0a180f
+            "crm_mon --one-shot --inactive %s" % xmlopt,
0a180f
+            need_root=True
0a180f
+        )
0a180f
+        if _out['status'] == 0:
0a180f
+            self.parse_crm_xml(_out['output'])
0a180f
+
0a180f
+    def parse_crm_xml(self, xmlstring):
0a180f
+        """
0a180f
+        Parse the xml output string provided by crm_mon
0a180f
+        """
0a180f
+        _xml = ElementTree.fromstring(xmlstring)
0a180f
+        nodes = _xml.find('nodes')
0a180f
+        for node in nodes:
0a180f
+            _node = node.attrib
0a180f
+            if self.get_option('online') and _node['online'] == 'true':
0a180f
+                self.nodes.append(_node['name'])
0a180f
+            elif self.get_option('offline') and _node['online'] == 'false':
0a180f
+                self.nodes.append(_node['name'])
0a180f
+
0a180f
+    def get_nodes_from_corosync(self):
0a180f
+        """
0a180f
+        As a fallback measure, read corosync.conf to get the node list. Note
0a180f
+        that this prevents us from separating online nodes from offline nodes.
0a180f
+        """
0a180f
+        self.log_warn("WARNING: unable to distinguish online nodes from "
0a180f
+                      "offline nodes when sourcing from corosync.conf")
0a180f
+        cc = self.primary.read_file('/etc/corosync/corosync.conf')
0a180f
+        nodes = re.findall(r'((\sring0_addr:)(.*))', cc)
0a180f
+        for node in nodes:
0a180f
+            self.nodes.append(node[-1].strip())
0a180f
 
0a180f
 # vim: set et ts=4 sw=4 :
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
0a180f
From 6701a7d77ecc998b018b54ecc00f9fd102ae9518 Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Mon, 21 Mar 2022 12:05:59 -0400
0a180f
Subject: [PATCH 2/2] [clusters] Allow clusters to not add localhost to node
0a180f
 list
0a180f
0a180f
For most of our supported clusters, we end up needing to add the
0a180f
local host executing `sos collect` to the node list (unless `--no-local`
0a180f
is used) as that accounts for the primary node that may otherwise be
0a180f
left off. However, this is not helpful for clusters that may reports
0a180f
node names as something other than resolveable names. In those cases,
0a180f
such as with pacemaker, adding the local hostname may result in
0a180f
duplicate collections.
0a180f
0a180f
Add a toggle to cluster profiles via a new `strict_node_list` class attr
0a180f
that, if True, will skip this addition. This toggle is default `False`
0a180f
to preserve existing behavior, and is now enabled for `pacemaker`
0a180f
specifically.
0a180f
0a180f
Related: RHBZ#2065821
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/collector/__init__.py           | 3 ++-
0a180f
 sos/collector/clusters/__init__.py  | 4 ++++
0a180f
 sos/collector/clusters/pacemaker.py | 1 +
0a180f
 3 files changed, 7 insertions(+), 1 deletion(-)
0a180f
0a180f
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
0a180f
index a8bb0064..d898ca34 100644
0a180f
--- a/sos/collector/__init__.py
0a180f
+++ b/sos/collector/__init__.py
0a180f
@@ -1073,7 +1073,8 @@ class SoSCollector(SoSComponent):
0a180f
             for node in self.node_list:
0a180f
                 if host == node.split('.')[0]:
0a180f
                     self.node_list.remove(node)
0a180f
-            self.node_list.append(self.hostname)
0a180f
+            if not self.cluster.strict_node_list:
0a180f
+                self.node_list.append(self.hostname)
0a180f
         self.reduce_node_list()
0a180f
         try:
0a180f
             _node_max = len(max(self.node_list, key=len))
0a180f
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
0a180f
index f3f550ad..f00677b8 100644
0a180f
--- a/sos/collector/clusters/__init__.py
0a180f
+++ b/sos/collector/clusters/__init__.py
0a180f
@@ -56,6 +56,10 @@
0a180f
     sos_plugin_options = {}
0a180f
     sos_preset = ''
0a180f
     cluster_name = None
0a180f
+    # set this to True if the local host running collect should *not* be
0a180f
+    # forcibly added to the node list. This can be helpful in situations where
0a180f
+    # the host's fqdn and the name the cluster uses are different
0a180f
+    strict_node_list = False
0a180f
 
0a180f
     def __init__(self, commons):
0a180f
         self.master = None
0a180f
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
0a180f
index 49d0ce51..bebcb265 100644
0a180f
--- a/sos/collector/clusters/pacemaker.py
0a180f
+++ b/sos/collector/clusters/pacemaker.py
0a180f
@@ -20,6 +20,7 @@ class pacemaker(Cluster):
0a180f
     cluster_name = 'Pacemaker High Availability Cluster Manager'
0a180f
     sos_plugins = ['pacemaker']
0a180f
     packages = ('pacemaker',)
0a180f
+    strict_node_list = True
0a180f
     option_list = [
0a180f
         ('online', True, 'Collect nodes listed as online'),
0a180f
         ('offline', True, 'Collect nodes listed as offline'),
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From 61765992812afb785e9552e01e3b5579118a6963 Mon Sep 17 00:00:00 2001
0a180f
From: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
Date: Fri, 1 Apr 2022 12:05:36 +0200
0a180f
Subject: [PATCH] Add one more container for plugin enablement
0a180f
0a180f
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
---
0a180f
 sos/report/plugins/openshift_ovn.py | 2 +-
0a180f
 1 file changed, 1 insertion(+), 1 deletion(-)
0a180f
0a180f
diff --git a/sos/report/plugins/openshift_ovn.py b/sos/report/plugins/openshift_ovn.py
0a180f
index b4787b8e..98522b1e 100644
0a180f
--- a/sos/report/plugins/openshift_ovn.py
0a180f
+++ b/sos/report/plugins/openshift_ovn.py
0a180f
@@ -16,7 +16,7 @@ class OpenshiftOVN(Plugin, RedHatPlugin):
0a180f
     """
0a180f
     short_desc = 'Openshift OVN'
0a180f
     plugin_name = "openshift_ovn"
0a180f
-    containers = ('ovnkube-master', 'ovn-ipsec')
0a180f
+    containers = ('ovnkube-master', 'ovnkube-node', 'ovn-ipsec')
0a180f
     profiles = ('openshift',)
0a180f
 
0a180f
     def setup(self):
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From d3aa071efc85507341cf65dd61414a734654f50a Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Mon, 28 Mar 2022 14:47:09 -0400
0a180f
Subject: [PATCH] [presets] Adjust OCP preset options
0a180f
0a180f
Adjust the options used by the 'ocp' preset to better reflect the
0a180f
current collection needs and approach.
0a180f
0a180f
This includes disabling the `cgroups` plugin due to the large amount of
0a180f
mostly irrelevant data captured due to the high number of containers
0a180f
present on OCP nodes, ensuring the `--container-runtime` option is set
0a180f
to `crio` to align container-based collections, disabling HTML report
0a180f
generation and increasing the base log size rather than blindly enabling
0a180f
all-logs.
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/presets/redhat/__init__.py | 13 +++++++++----
0a180f
 1 file changed, 9 insertions(+), 4 deletions(-)
0a180f
0a180f
diff --git a/sos/presets/redhat/__init__.py b/sos/presets/redhat/__init__.py
0a180f
index 865c9b6b..0b9f6f11 100644
0a180f
--- a/sos/presets/redhat/__init__.py
0a180f
+++ b/sos/presets/redhat/__init__.py
0a180f
@@ -36,10 +36,15 @@ RHOSP_OPTS = SoSOptions(plugopts=[
0a180f
 
0a180f
 RHOCP = "ocp"
0a180f
 RHOCP_DESC = "OpenShift Container Platform by Red Hat"
0a180f
-RHOCP_OPTS = SoSOptions(all_logs=True, verify=True, plugopts=[
0a180f
-                             'networking.timeout=600',
0a180f
-                             'networking.ethtool_namespaces=False',
0a180f
-                             'networking.namespaces=200'])
0a180f
+RHOCP_OPTS = SoSOptions(
0a180f
+    verify=True, skip_plugins=['cgroups'], container_runtime='crio',
0a180f
+    no_report=True, log_size=100,
0a180f
+    plugopts=[
0a180f
+        'crio.timeout=600',
0a180f
+        'networking.timeout=600',
0a180f
+        'networking.ethtool_namespaces=False',
0a180f
+        'networking.namespaces=200'
0a180f
+    ])
0a180f
 
0a180f
 RH_CFME = "cfme"
0a180f
 RH_CFME_DESC = "Red Hat CloudForms"
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From f2b67ab820070063995689fed03492cdaa012d01 Mon Sep 17 00:00:00 2001
0a180f
From: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
Date: Fri, 1 Apr 2022 17:01:35 +0200
0a180f
Subject: [PATCH] Use /etc/os-release instead of /etc/redhat-release as the
0a180f
 most compatible way to find host release
0a180f
0a180f
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
---
0a180f
 sos/policies/distros/redhat.py | 5 ++---
0a180f
 1 file changed, 2 insertions(+), 3 deletions(-)
0a180f
0a180f
diff --git a/sos/policies/distros/redhat.py b/sos/policies/distros/redhat.py
0a180f
index 0c72a5e4..2e117f37 100644
0a180f
--- a/sos/policies/distros/redhat.py
0a180f
+++ b/sos/policies/distros/redhat.py
0a180f
@@ -40,7 +40,6 @@ class RedHatPolicy(LinuxPolicy):
0a180f
         ('Distribution Website', 'https://www.redhat.com/'),
0a180f
         ('Commercial Support', 'https://www.access.redhat.com/')
0a180f
     ]
0a180f
-    _redhat_release = '/etc/redhat-release'
0a180f
     _tmp_dir = "/var/tmp"
0a180f
     _in_container = False
0a180f
     default_scl_prefix = '/opt/rh'
0a180f
@@ -471,7 +470,7 @@ support representative.
0a180f
         atomic = False
0a180f
         if ENV_HOST_SYSROOT not in os.environ:
0a180f
             return atomic
0a180f
-        host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release
0a180f
+        host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE
0a180f
         if not os.path.exists(host_release):
0a180f
             return False
0a180f
         try:
0a180f
@@ -558,7 +557,7 @@ support representative.
0a180f
         coreos = False
0a180f
         if ENV_HOST_SYSROOT not in os.environ:
0a180f
             return coreos
0a180f
-        host_release = os.environ[ENV_HOST_SYSROOT] + cls._redhat_release
0a180f
+        host_release = os.environ[ENV_HOST_SYSROOT] + OS_RELEASE
0a180f
         try:
0a180f
             for line in open(host_release, 'r').read().splitlines():
0a180f
                 coreos |= 'Red Hat Enterprise Linux CoreOS' in line
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From ee0dd68199a2c9296eafe64ead5b2263c8270e4a Mon Sep 17 00:00:00 2001
0a180f
From: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
Date: Wed, 6 Apr 2022 11:56:41 +0200
0a180f
Subject: [PATCH] Use --force-pull-image option for pods created with oc. Set
0a180f
 --force-pull-image=True by default, can be turned off with
0a180f
 --force-pull-image=False
0a180f
0a180f
Signed-off-by: Nadia Pinaeva <npinaeva@redhat.com>
0a180f
---
0a180f
 man/en/sos-collect.1           | 16 +++++++++++-----
0a180f
 sos/collector/__init__.py      |  9 +++++----
0a180f
 sos/collector/transports/oc.py |  2 ++
0a180f
 sos/options.py                 | 20 ++++++++++++++------
0a180f
 4 files changed, 32 insertions(+), 15 deletions(-)
0a180f
0a180f
diff --git a/man/en/sos-collect.1 b/man/en/sos-collect.1
0a180f
index 9b0a5d7b..2f60332b 100644
0a180f
--- a/man/en/sos-collect.1
0a180f
+++ b/man/en/sos-collect.1
0a180f
@@ -28,7 +28,7 @@
0a180f
     [\-\-no\-local]
0a180f
     [\-\-master MASTER]
0a180f
     [\-\-image IMAGE]
0a180f
-    [\-\-force-pull-image]
0a180f
+    [\-\-force-pull-image TOGGLE, --pull TOGGLE]
0a180f
     [\-\-registry-user USER]
0a180f
     [\-\-registry-password PASSWORD]
0a180f
     [\-\-registry-authfile FILE]
0a180f
@@ -262,10 +262,16 @@ Specify an image to use for the temporary container created for collections on
0a180f
 containerized host, if you do not want to use the default image specifed by the
0a180f
 host's policy. Note that this should include the registry.
0a180f
 .TP
0a180f
-\fB\-\-force-pull-image\fR
0a180f
-Use this option to force the container runtime to pull the specified image (even
0a180f
-if it is the policy default image) even if the image already exists on the host.
0a180f
-This may be useful to update an older container image on containerized hosts.
0a180f
+\fB\-\-force-pull-image TOGGLE, \-\-pull TOGGLE\fR
0a180f
+When collecting an sos report from a containerized host, force the host to always
0a180f
+pull the specified image, even if that image already exists on the host.
0a180f
+This is useful to ensure that the latest version of that image is always in use.
0a180f
+Disabling this option will use whatever version of the image is present on the node,
0a180f
+and only attempt a pull if there is no copy of the image present at all.
0a180f
+
0a180f
+Enable with true/on/yes or disable with false/off/no
0a180f
+
0a180f
+Default: true
0a180f
 .TP
0a180f
 \fB\-\-registry-user USER\fR
0a180f
 Specify the username to authenticate to the registry with in order to pull the container
0a180f
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
0a180f
index d898ca34..66c3d932 100644
0a180f
--- a/sos/collector/__init__.py
0a180f
+++ b/sos/collector/__init__.py
0a180f
@@ -27,7 +27,7 @@
0a180f
 from textwrap import fill
0a180f
 from sos.cleaner import SoSCleaner
0a180f
 from sos.collector.sosnode import SosNode
0a180f
-from sos.options import ClusterOption
0a180f
+from sos.options import ClusterOption, str_to_bool
0a180f
 from sos.component import SoSComponent
0a180f
 from sos import __version__
0a180f
 
0a180f
@@ -85,7 +85,7 @@ class SoSCollector(SoSComponent):
0a180f
         'encrypt_pass': '',
0a180f
         'group': None,
0a180f
         'image': '',
0a180f
-        'force_pull_image': False,
0a180f
+        'force_pull_image': True,
0a180f
         'jobs': 4,
0a180f
         'keywords': [],
0a180f
         'keyword_file': None,
0a180f
@@ -357,8 +357,9 @@ class SoSCollector(SoSComponent):
0a180f
         collect_grp.add_argument('--image',
0a180f
                                  help=('Specify the container image to use for'
0a180f
                                        ' containerized hosts.'))
0a180f
-        collect_grp.add_argument('--force-pull-image', '--pull', default=False,
0a180f
-                                 action='store_true',
0a180f
+        collect_grp.add_argument('--force-pull-image', '--pull',
0a180f
+                                 default=True, choices=(True, False),
0a180f
+                                 type=str_to_bool,
0a180f
                                  help='Force pull the container image even if '
0a180f
                                       'it already exists on the host')
0a180f
         collect_grp.add_argument('--registry-user', default=None,
0a180f
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
0a180f
index 90a802b2..8f6aa9b4 100644
0a180f
--- a/sos/collector/transports/oc.py
0a180f
+++ b/sos/collector/transports/oc.py
0a180f
@@ -147,6 +147,8 @@ class OCTransport(RemoteTransport):
0a180f
                         "tty": True
0a180f
                     }
0a180f
                 ],
0a180f
+                "imagePullPolicy":
0a180f
+                    "Always" if self.opts.force_pull_image else "IfNotPresent",
0a180f
                 "restartPolicy": "Never",
0a180f
                 "nodeName": self.address,
0a180f
                 "hostNetwork": True,
0a180f
diff --git a/sos/options.py b/sos/options.py
0a180f
index 4846a509..2d5a5135 100644
0a180f
--- a/sos/options.py
0a180f
+++ b/sos/options.py
0a180f
@@ -18,6 +18,16 @@ def _is_seq(val):
0a180f
     return val_type is list or val_type is tuple
0a180f
 
0a180f
 
0a180f
+def str_to_bool(val):
0a180f
+    _val = val.lower()
0a180f
+    if _val in ['true', 'on', 'yes']:
0a180f
+        return True
0a180f
+    elif _val in ['false', 'off', 'no']:
0a180f
+        return False
0a180f
+    else:
0a180f
+        return None
0a180f
+
0a180f
+
0a180f
 class SoSOptions():
0a180f
 
0a180f
     def _merge_opt(self, opt, src, is_default):
0a180f
@@ -153,15 +163,13 @@ class SoSOptions():
0a180f
         if isinstance(self.arg_defaults[key], list):
0a180f
             return [v for v in val.split(',')]
0a180f
         if isinstance(self.arg_defaults[key], bool):
0a180f
-            _val = val.lower()
0a180f
-            if _val in ['true', 'on', 'yes']:
0a180f
-                return True
0a180f
-            elif _val in ['false', 'off', 'no']:
0a180f
-                return False
0a180f
-            else:
0a180f
+            val = str_to_bool(val)
0a180f
+            if val is None:
0a180f
                 raise Exception(
0a180f
                     "Value of '%s' in %s must be True or False or analagous"
0a180f
                     % (key, conf))
0a180f
+            else:
0a180f
+                return val
0a180f
         if isinstance(self.arg_defaults[key], int):
0a180f
             try:
0a180f
                 return int(val)
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From ade857c82926bb7de2c45232f00a3f346db4e59a Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Wed, 3 Nov 2021 16:28:10 -0400
0a180f
Subject: [PATCH] [collect] Update docstrings for collect for help command
0a180f
0a180f
Updates the docstrings for collect components for the new `help`
0a180f
command, including minor formatting changes to make the help output more
0a180f
human-friendly.
0a180f
0a180f
This includes docstring updates and changes to transports and cluster
0a180f
profiles.
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/collector/clusters/jbon.py              | 13 +++++---
0a180f
 sos/collector/clusters/kubernetes.py        |  7 ++++-
0a180f
 sos/collector/clusters/ocp.py               | 35 ++++++++++++++++++++-
0a180f
 sos/collector/clusters/ovirt.py             | 27 ++++++++++++++++
0a180f
 sos/collector/clusters/satellite.py         |  9 +++++-
0a180f
 sos/collector/transports/control_persist.py | 12 +++++--
0a180f
 sos/collector/transports/local.py           |  7 +++--
0a180f
 sos/collector/transports/oc.py              | 20 ++++++++++--
0a180f
 8 files changed, 114 insertions(+), 16 deletions(-)
0a180f
0a180f
diff --git a/sos/collector/clusters/jbon.py b/sos/collector/clusters/jbon.py
0a180f
index 8f083ac6..219e1901 100644
0a180f
--- a/sos/collector/clusters/jbon.py
0a180f
+++ b/sos/collector/clusters/jbon.py
0a180f
@@ -12,11 +12,14 @@ from sos.collector.clusters import Cluster
0a180f
 
0a180f
 
0a180f
 class jbon(Cluster):
0a180f
-    '''Just a Bunch of Nodes
0a180f
-
0a180f
-    Used when --cluster-type=none (or jbon), to avoid cluster checks, and just
0a180f
-    use the provided --nodes list
0a180f
-    '''
0a180f
+    """
0a180f
+    Used when --cluster-type=none (or jbon) to avoid cluster checks, and just
0a180f
+    use the provided --nodes list.
0a180f
+
0a180f
+    Using this profile will skip any and all operations that a cluster profile
0a180f
+    normally performs, and will not set any plugins, plugin options, or presets
0a180f
+    for the sos report generated on the nodes provided by --nodes.
0a180f
+    """
0a180f
 
0a180f
     cluster_name = 'Just a Bunch Of Nodes (no cluster)'
0a180f
     packages = None
0a180f
diff --git a/sos/collector/clusters/kubernetes.py b/sos/collector/clusters/kubernetes.py
0a180f
index 99f788dc..04752977 100644
0a180f
--- a/sos/collector/clusters/kubernetes.py
0a180f
+++ b/sos/collector/clusters/kubernetes.py
0a180f
@@ -13,7 +13,12 @@ from sos.collector.clusters import Cluster
0a180f
 
0a180f
 
0a180f
 class kubernetes(Cluster):
0a180f
-
0a180f
+    """
0a180f
+    The kuberentes cluster profile is intended to be used on kubernetes
0a180f
+    clusters built from the upstream/source kubernetes (k8s) project. It is
0a180f
+    not intended for use with other projects or platforms that are built ontop
0a180f
+    of kubernetes.
0a180f
+    """
0a180f
     cluster_name = 'Community Kubernetes'
0a180f
     packages = ('kubernetes-master',)
0a180f
     sos_plugins = ['kubernetes']
0a180f
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
0a180f
index ae93ad58..f1714239 100644
0a180f
--- a/sos/collector/clusters/ocp.py
0a180f
+++ b/sos/collector/clusters/ocp.py
0a180f
@@ -16,7 +16,40 @@ from sos.utilities import is_executable
0a180f
 
0a180f
 
0a180f
 class ocp(Cluster):
0a180f
-    """OpenShift Container Platform v4"""
0a180f
+    """
0a180f
+    This profile is for use with OpenShift Container Platform (v4) clusters
0a180f
+    instead of the kubernetes profile.
0a180f
+
0a180f
+    This profile will favor using the `oc` transport type, which means it will
0a180f
+    leverage a locally installed `oc` binary. This is also how node enumeration
0a180f
+    is done. To instead use SSH to connect to the nodes, use the
0a180f
+    '--transport=control_persist' option.
0a180f
+
0a180f
+    Thus, a functional `oc` binary for the user executing sos collect is
0a180f
+    required. Functional meaning that the user can run `oc` commands with
0a180f
+    clusterAdmin privileges.
0a180f
+
0a180f
+    If this requires the use of a secondary configuration file, specify that
0a180f
+    path with the 'kubeconfig' cluster option.
0a180f
+
0a180f
+    Alternatively, provide a clusterAdmin access token either via the 'token'
0a180f
+    cluster option or, preferably, the SOSOCPTOKEN environment variable.
0a180f
+
0a180f
+    By default, this profile will enumerate only master nodes within the
0a180f
+    cluster, and this may be changed by overriding the 'role' cluster option.
0a180f
+    To collect from all nodes in the cluster regardless of role, use the form
0a180f
+    -c ocp.role=''.
0a180f
+
0a180f
+    Filtering nodes by a label applied to that node is also possible via the
0a180f
+    label cluster option, though be aware that this is _combined_ with the role
0a180f
+    option mentioned above.
0a180f
+
0a180f
+    To avoid redundant collections of OCP API information (e.g. 'oc get'
0a180f
+    commands), this profile will attempt to enable the openshift plugin on only
0a180f
+    a single master node. If the none of the master nodes have a functional
0a180f
+    'oc' binary available, *and* the --no-local option is used, that means that
0a180f
+    no API data will be collected.
0a180f
+    """
0a180f
 
0a180f
     cluster_name = 'OpenShift Container Platform v4'
0a180f
     packages = ('openshift-hyperkube', 'openshift-clients')
0a180f
diff --git a/sos/collector/clusters/ovirt.py b/sos/collector/clusters/ovirt.py
0a180f
index bd2d0c74..4587a1ca 100644
0a180f
--- a/sos/collector/clusters/ovirt.py
0a180f
+++ b/sos/collector/clusters/ovirt.py
0a180f
@@ -17,6 +17,33 @@ ENGINE_KEY = '/etc/pki/ovirt-engine/keys/engine_id_rsa'
0a180f
 
0a180f
 
0a180f
 class ovirt(Cluster):
0a180f
+    """
0a180f
+    This cluster profile is for the oVirt/RHV project which provides for a
0a180f
+    virtualization cluster built ontop of KVM.
0a180f
+
0a180f
+    Nodes enumerated will be hypervisors within the envrionment, not virtual
0a180f
+    machines running on those hypervisors. By default, ALL hypervisors within
0a180f
+    the environment are returned. This may be influenced by the 'cluster' and
0a180f
+    'datacenter' cluster options, which will limit enumeration to hypervisors
0a180f
+    within the specific cluster and/or datacenter. The spm-only cluster option
0a180f
+    may also be used to only collect from hypervisors currently holding the
0a180f
+    SPM role.
0a180f
+
0a180f
+    Optionally, to only collect an archive from manager and the postgresql
0a180f
+    database, use the no-hypervisors cluster option.
0a180f
+
0a180f
+    By default, a second archive from the manager will be collected that is
0a180f
+    just the postgresql plugin configured in such a way that a dump of the
0a180f
+    manager's database that can be explored and restored to other systems will
0a180f
+    be collected.
0a180f
+
0a180f
+    The ovirt profile focuses on the upstream, community ovirt project.
0a180f
+
0a180f
+    The rhv profile is for Red Hat customers running RHV (formerly RHEV).
0a180f
+
0a180f
+    The rhhi_virt profile is for Red Hat customers running RHV in a
0a180f
+    hyper-converged setup and enables gluster collections.
0a180f
+    """
0a180f
 
0a180f
     cluster_name = 'Community oVirt'
0a180f
     packages = ('ovirt-engine',)
0a180f
diff --git a/sos/collector/clusters/satellite.py b/sos/collector/clusters/satellite.py
0a180f
index 7c21e553..5e28531d 100644
0a180f
--- a/sos/collector/clusters/satellite.py
0a180f
+++ b/sos/collector/clusters/satellite.py
0a180f
@@ -13,7 +13,14 @@ from sos.collector.clusters import Cluster
0a180f
 
0a180f
 
0a180f
 class satellite(Cluster):
0a180f
-    """Red Hat Satellite 6"""
0a180f
+    """
0a180f
+    This profile is specifically for Red Hat Satellite 6, and not earlier
0a180f
+    releases of Satellite.
0a180f
+
0a180f
+    While note technically a 'cluster' in the traditional sense, Satellite
0a180f
+    does provide for 'capsule' nodes which is what this profile aims to
0a180f
+    enumerate beyond the 'primary' Satellite system.
0a180f
+    """
0a180f
 
0a180f
     cluster_name = 'Red Hat Satellite 6'
0a180f
     packages = ('satellite', 'satellite-installer')
0a180f
diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py
0a180f
index 3e848b41..ae6c7e43 100644
0a180f
--- a/sos/collector/transports/control_persist.py
0a180f
+++ b/sos/collector/transports/control_persist.py
0a180f
@@ -26,10 +26,18 @@ from sos.utilities import sos_get_command_output
0a180f
 
0a180f
 
0a180f
 class SSHControlPersist(RemoteTransport):
0a180f
-    """A transport for collect that leverages OpenSSH's Control Persist
0a180f
+    """
0a180f
+    A transport for collect that leverages OpenSSH's ControlPersist
0a180f
     functionality which uses control sockets to transparently keep a connection
0a180f
     open to the remote host without needing to rebuild the SSH connection for
0a180f
-    each and every command executed on the node
0a180f
+    each and every command executed on the node.
0a180f
+
0a180f
+    This transport will by default assume the use of SSH keys, meaning keys
0a180f
+    have already been distributed to target nodes. If this is not the case,
0a180f
+    users will need to provide a password using the --password or
0a180f
+    --password-per-node option, depending on if the password to connect to all
0a180f
+    nodes is the same or not. Note that these options prevent the use of the
0a180f
+    --batch option, as they require user input.
0a180f
     """
0a180f
 
0a180f
     name = 'control_persist'
0a180f
diff --git a/sos/collector/transports/local.py b/sos/collector/transports/local.py
0a180f
index 2996d524..dd72053c 100644
0a180f
--- a/sos/collector/transports/local.py
0a180f
+++ b/sos/collector/transports/local.py
0a180f
@@ -15,9 +15,10 @@ from sos.collector.transports import RemoteTransport
0a180f
 
0a180f
 
0a180f
 class LocalTransport(RemoteTransport):
0a180f
-    """A 'transport' to represent a local node. This allows us to more easily
0a180f
-    extend SoSNode() without having a ton of 'if local' or similar checks in
0a180f
-    more places than we actually need them
0a180f
+    """
0a180f
+    A 'transport' to represent a local node. No remote connection is actually
0a180f
+    made, and all commands set to be run by this transport are executed locally
0a180f
+    without any wrappers.
0a180f
     """
0a180f
 
0a180f
     name = 'local_node'
0a180f
diff --git a/sos/collector/transports/oc.py b/sos/collector/transports/oc.py
0a180f
index 720dd61d..0fc9eee8 100644
0a180f
--- a/sos/collector/transports/oc.py
0a180f
+++ b/sos/collector/transports/oc.py
0a180f
@@ -18,15 +18,29 @@ from sos.utilities import (is_executable, sos_get_command_output,
0a180f
 
0a180f
 
0a180f
 class OCTransport(RemoteTransport):
0a180f
-    """This transport leverages the execution of commands via a locally
0a180f
+    """
0a180f
+    This transport leverages the execution of commands via a locally
0a180f
     available and configured ``oc`` binary for OCPv4 environments.
0a180f
 
0a180f
+    The location of the oc binary MUST be in the $PATH used by the locally
0a180f
+    loaded SoS policy. Specifically this means that the binary cannot be in the
0a180f
+    running user's home directory, such as ~/.local/bin.
0a180f
+
0a180f
     OCPv4 clusters generally discourage the use of SSH, so this transport may
0a180f
     be used to remove our use of SSH in favor of the environment provided
0a180f
     method of connecting to nodes and executing commands via debug pods.
0a180f
 
0a180f
-    Note that this approach will generate multiple debug pods over the course
0a180f
-    of our execution
0a180f
+    The debug pod created will be a privileged pod that mounts the host's
0a180f
+    filesystem internally so that sos report collections reflect the host, and
0a180f
+    not the container in which it runs.
0a180f
+
0a180f
+    This transport will execute within a temporary 'sos-collect-tmp' project
0a180f
+    created by the OCP cluster profile. The project will be removed at the end
0a180f
+    of execution.
0a180f
+
0a180f
+    In the event of failures due to a misbehaving OCP API or oc binary, it is
0a180f
+    recommended to fallback to the control_persist transport by manually
0a180f
+    setting the --transport option.
0a180f
     """
0a180f
 
0a180f
     name = 'oc'
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From ce289a3ae7101a898efdb84ddfd575576ba5819b Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Tue, 5 Apr 2022 11:32:11 -0400
0a180f
Subject: [PATCH] [ocp, openshift] Re-align API collection options and rename
0a180f
 option
0a180f
0a180f
Previously, in #2888, the `openshift` plugin was extended to allow API
0a180f
collections by using a default-available kubeconfig file rather than
0a180f
relying on user-provided tokens. This also included flipping the default
0a180f
value of the `no-oc` plugin option to `True` (meaning do not collect API
0a180f
output by default).
0a180f
0a180f
This worked for the plugin, but it introduced a gap in `sos collect`
0a180f
whereby the cluster profile could no longer reliably enable API
0a180f
collections when trying to leverage the new functionality of not
0a180f
requiring a user token.
0a180f
0a180f
Fix this by updating the cluster profile to align with the new
0a180f
default-off approach of API collections.
0a180f
0a180f
Along with this, add a toggle to the cluster profile directly to allow
0a180f
users to toggle API collections on or off (default off) directly. This
0a180f
is done via a new `with-api` cluster option (e.g. `-c ocp.with-api`).
0a180f
Further, rename the `openshift` plugin option from `no-oc` to
0a180f
`with-api`. This change not only makes the option use case far more
0a180f
obvious, it will also align the use of the option to both `collect` and
0a180f
`report` so that users need only be aware of a single option for either
0a180f
method.
0a180f
0a180f
The cluster profile also has logic to detect which plugin option,
0a180f
`no-oc` or `with-api` to use based on the (RHEL) sos version installed
0a180f
on the nodes being inspected by the `ocp` cluster profile.
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/collector/clusters/ocp.py   | 72 +++++++++++++++++++++++++++------
0a180f
 sos/report/plugins/openshift.py | 26 +++++++-----
0a180f
 2 files changed, 77 insertions(+), 21 deletions(-)
0a180f
0a180f
diff --git a/sos/collector/clusters/ocp.py b/sos/collector/clusters/ocp.py
0a180f
index 9beb2f9b..e31d1903 100644
0a180f
--- a/sos/collector/clusters/ocp.py
0a180f
+++ b/sos/collector/clusters/ocp.py
0a180f
@@ -30,7 +30,11 @@ class ocp(Cluster):
0a180f
     clusterAdmin privileges.
0a180f
 
0a180f
     If this requires the use of a secondary configuration file, specify that
0a180f
-    path with the 'kubeconfig' cluster option.
0a180f
+    path with the 'kubeconfig' cluster option. This config file will also be
0a180f
+    used on a single master node to perform API collections if the `with-api`
0a180f
+    option is enabled (default disabled). If no `kubeconfig` option is given,
0a180f
+    but `with-api` is enabled, the cluster profile will attempt to use a
0a180f
+    well-known default kubeconfig file if it is available on the host.
0a180f
 
0a180f
     Alternatively, provide a clusterAdmin access token either via the 'token'
0a180f
     cluster option or, preferably, the SOSOCPTOKEN environment variable.
0a180f
@@ -45,7 +49,7 @@ class ocp(Cluster):
0a180f
     option mentioned above.
0a180f
 
0a180f
     To avoid redundant collections of OCP API information (e.g. 'oc get'
0a180f
-    commands), this profile will attempt to enable the openshift plugin on only
0a180f
+    commands), this profile will attempt to enable the API collections on only
0a180f
     a single master node. If the none of the master nodes have a functional
0a180f
     'oc' binary available, *and* the --no-local option is used, that means that
0a180f
     no API data will be collected.
0a180f
@@ -63,7 +67,8 @@ class ocp(Cluster):
0a180f
         ('label', '', 'Colon delimited list of labels to select nodes with'),
0a180f
         ('role', 'master', 'Colon delimited list of roles to filter on'),
0a180f
         ('kubeconfig', '', 'Path to the kubeconfig file'),
0a180f
-        ('token', '', 'Service account token to use for oc authorization')
0a180f
+        ('token', '', 'Service account token to use for oc authorization'),
0a180f
+        ('with-api', False, 'Collect OCP API data from a master node')
0a180f
     ]
0a180f
 
0a180f
     def fmt_oc_cmd(self, cmd):
0a180f
@@ -219,13 +219,52 @@
0a180f
             return False
0a180f
         return 'master' in self.node_dict[sosnode.address]['roles']
0a180f
 
0a180f
+    def _toggle_api_opt(self, node, use_api):
0a180f
+        """In earlier versions of sos, the openshift plugin option that is
0a180f
+        used to toggle the API collections was called `no-oc` rather than
0a180f
+        `with-api`. This older plugin option had the inverse logic of the
0a180f
+        current `with-api` option.
0a180f
+
0a180f
+        Use this to toggle the correct plugin option given the node's sos
0a180f
+        version. Note that the use of version 4.2 here is tied to the RHEL
0a180f
+        release (the only usecase for this cluster profile) rather than
0a180f
+        the upstream version given the backports for that downstream.
0a180f
+
0a180f
+        :param node:    The node being inspected for API collections
0a180f
+        :type node:     ``SoSNode``
0a180f
+
0a180f
+        :param use_api: Should this node enable API collections?
0a180f
+        :type use_api:  ``bool``
0a180f
+        """
0a180f
+        if node.check_sos_version('4.2-16'):
0a180f
+            _opt = 'with-api'
0a180f
+            _val = 'on' if use_api else 'off'
0a180f
+        else:
0a180f
+            _opt = 'no-oc'
0a180f
+            _val = 'off' if use_api else 'on'
0a180f
+        node.plugopts.append("openshift.%s=%s" % (_opt, _val))
0a180f
+
0a180f
     def set_master_options(self, node):
0a180f
+
0a180f
         node.enable_plugins.append('openshift')
0a180f
+        if not self.get_option('with-api'):
0a180f
+            self._toggle_api_opt(node, False)
0a180f
+            return
0a180f
         if self.api_collect_enabled:
0a180f
             # a primary has already been enabled for API collection, disable
0a180f
             # it among others
0a180f
-            node.plugopts.append('openshift.no-oc=on')
0a180f
+            self._toggle_api_opt(node, False)
0a180f
         else:
0a180f
+            # running in a container, so reference the /host mount point
0a180f
+            master_kube = (
0a180f
+                '/host/etc/kubernetes/static-pod-resources/'
0a180f
+                'kube-apiserver-certs/secrets/node-kubeconfigs/'
0a180f
+                'localhost.kubeconfig'
0a180f
+            )
0a180f
+            _optconfig = self.get_option('kubeconfig')
0a180f
+            if _optconfig and not _optconfig.startswith('/host'):
0a180f
+                _optconfig = '/host/' + _optconfig
0a180f
+            _kubeconfig = _optconfig or master_kube
0a180f
             _oc_cmd = 'oc'
0a180f
             if node.host.containerized:
0a180f
                 _oc_cmd = '/host/bin/oc'
0a180f
@@ -244,17 +288,21 @@ class ocp(Cluster):
0a180f
                                       need_root=True)
0a180f
             if can_oc['status'] == 0:
0a180f
                 # the primary node can already access the API
0a180f
+                self._toggle_api_opt(node, True)
0a180f
                 self.api_collect_enabled = True
0a180f
             elif self.token:
0a180f
                 node.sos_env_vars['SOSOCPTOKEN'] = self.token
0a180f
+                self._toggle_api_opt(node, True)
0a180f
+                self.api_collect_enabled = True
0a180f
+            elif node.file_exists(_kubeconfig):
0a180f
+                # if the file exists, then the openshift sos plugin will use it
0a180f
+                # if the with-api option is turned on
0a180f
+                if not _kubeconfig == master_kube:
0a180f
+                    node.plugopts.append(
0a180f
+                        "openshift.kubeconfig=%s" % _kubeconfig
0a180f
+                    )
0a180f
+                self._toggle_api_opt(node, True)
0a180f
                 self.api_collect_enabled = True
0a180f
-            elif self.get_option('kubeconfig'):
0a180f
-                kc = self.get_option('kubeconfig')
0a180f
-                if node.file_exists(kc):
0a180f
-                    if node.host.containerized:
0a180f
-                        kc = "/host/%s" % kc
0a180f
-                    node.sos_env_vars['KUBECONFIG'] = kc
0a180f
-                    self.api_collect_enabled = True
0a180f
             if self.api_collect_enabled:
0a180f
                 msg = ("API collections will be performed on %s\nNote: API "
0a180f
                        "collections may extend runtime by 10s of minutes\n"
0a180f
@@ -264,6 +312,6 @@ class ocp(Cluster):
0a180f
 
0a180f
     def set_node_options(self, node):
0a180f
         # don't attempt OC API collections on non-primary nodes
0a180f
-        node.plugopts.append('openshift.no-oc=on')
0a180f
+        self._toggle_api_opt(node, False)
0a180f
 
0a180f
 # vim: set et ts=4 sw=4 :
0a180f
diff --git a/sos/report/plugins/openshift.py b/sos/report/plugins/openshift.py
0a180f
index d643f04c..a41ab62b 100644
0a180f
--- a/sos/report/plugins/openshift.py
0a180f
+++ b/sos/report/plugins/openshift.py
0a180f
@@ -19,7 +19,10 @@ class Openshift(Plugin, RedHatPlugin):
0a180f
     further extending the kubernetes plugin (or the OCP 3.x extensions included
0a180f
     in the Red Hat version of the kube plugin).
0a180f
 
0a180f
-    By default, this plugin will collect cluster information and inspect the
0a180f
+    This plugin may collect OCP API information when the `with-api` option is
0a180f
+    enabled. This option is disabled by default.
0a180f
+
0a180f
+    When enabled, this plugin will collect cluster information and inspect the
0a180f
     default namespaces/projects that are created during deployment - i.e. the
0a180f
     namespaces of the cluster projects matching openshift.* and kube.*. At the
0a180f
     time of this plugin's creation that number of default projects is already
0a180f
@@ -34,16 +37,20 @@ class Openshift(Plugin, RedHatPlugin):
0a180f
 
0a180f
     Users will need to either:
0a180f
 
0a180f
-        1) Provide the bearer token via the `-k openshift.token` option
0a180f
-        2) Provide the bearer token via the `SOSOCPTOKEN` environment variable
0a180f
-        3) Otherwise ensure that the root user can successfully run `oc` and
0a180f
+        1) Accept the use of a well-known stock kubeconfig file provided via a
0a180f
+           static pod resource for the kube-apiserver
0a180f
+        2) Provide the bearer token via the `-k openshift.token` option
0a180f
+        3) Provide the bearer token via the `SOSOCPTOKEN` environment variable
0a180f
+        4) Otherwise ensure that the root user can successfully run `oc` and
0a180f
            get proper output prior to running this plugin
0a180f
 
0a180f
 
0a180f
-    It is highly suggested that option #2 be used first, as this will prevent
0a180f
-    the token from being recorded in output saved to the archive. Option #1 may
0a180f
+    It is highly suggested that option #1 be used first, as this uses well
0a180f
+    known configurations and requires the least information from the user. If
0a180f
+    using a token, it is recommended to use option #3 as this will prevent
0a180f
+    the token from being recorded in output saved to the archive. Option #2 may
0a180f
     be used if this is considered an acceptable risk. It is not recommended to
0a180f
-    rely on option #3, though it will provide the functionality needed.
0a180f
+    rely on option #4, though it will provide the functionality needed.
0a180f
     """
0a180f
 
0a180f
     short_desc = 'Openshift Container Platform 4.x'
0a180f
@@ -71,6 +71,7 @@
0a180f
          'fast', None),
0a180f
         ('host', 'host address to use for oc login, including port', 'fast',
0a180f
          'https://localhost:6443'),
0a180f
+        ('with-api', 'collect output from the OCP API', 'fast', False),
0a180f
         ('no-oc', 'do not collect `oc` command output', 'fast', True),
0a180f
         ('podlogs', 'collect logs from each pod', 'fast', True),
0a180f
         ('podlogs-filter', ('limit podlogs collection to pods matching this '
0a180f
@@ -212,7 +220,7 @@ class Openshift(Plugin, RedHatPlugin):
0a180f
         self.add_copy_spec('/etc/kubernetes/*')
0a180f
 
0a180f
         # see if we run `oc` commands
0a180f
-        if not self.get_option('no-oc'):
0a180f
+        if self.get_option('with-api'):
0a180f
             can_run_oc = self._check_oc_logged_in()
0a180f
         else:
0a180f
             can_run_oc = False
0a180f
-- 
0a180f
2.27.0
0a180f
0a180f
From 2b2ad04884cfdda78c02a33a73603d249c0a4dd5 Mon Sep 17 00:00:00 2001
0a180f
From: Jake Hunsaker <jhunsake@redhat.com>
0a180f
Date: Tue, 19 Oct 2021 11:51:56 -0400
0a180f
Subject: [PATCH 1/2] [Plugin] Allow writing command output directly to disk
0a180f
0a180f
This commit addresses a long standing ask in sos, regarding resource
0a180f
consumption when sos is run with `--all-logs`.
0a180f
0a180f
For executions where `--all-logs` is used, or for specific commands
0a180f
where `sizelimit` has been set to 0, `add_cmd_output()` and
0a180f
`add_journal()` will now instruct `sos_get_command_output()` to write
0a180f
output directly to a file rather than saving the output in memory.
0a180f
0a180f
When this occurs, the `output` key in the returned dict will be empty.
0a180f
0a180f
Note that this does extend to `collect_cmd_output()` or `exec_cmd()`.
0a180f
0a180f
Resolves: #1506
0a180f
0a180f
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
0a180f
---
0a180f
 sos/archive.py                 | 14 +++----
0a180f
 sos/report/__init__.py         | 10 ++++-
0a180f
 sos/report/plugins/__init__.py | 70 +++++++++++++++++++++++-----------
0a180f
 sos/utilities.py               | 69 ++++++++++++++++++++++++++++-----
0a180f
 4 files changed, 123 insertions(+), 40 deletions(-)
0a180f
0a180f
diff --git a/sos/archive.py b/sos/archive.py
0a180f
index e3c68b77..e92d5d60 100644
0a180f
--- a/sos/archive.py
0a180f
+++ b/sos/archive.py
0a180f
@@ -251,7 +251,7 @@ class FileCacheArchive(Archive):
0a180f
 
0a180f
         return dest
0a180f
 
0a180f
-    def _check_path(self, src, path_type, dest=None, force=False):
0a180f
+    def check_path(self, src, path_type, dest=None, force=False):
0a180f
         """Check a new destination path in the archive.
0a180f
 
0a180f
             Since it is possible for multiple plugins to collect the same
0a180f
@@ -345,7 +345,7 @@ class FileCacheArchive(Archive):
0a180f
             if not dest:
0a180f
                 dest = src
0a180f
 
0a180f
-            dest = self._check_path(dest, P_FILE)
0a180f
+            dest = self.check_path(dest, P_FILE)
0a180f
             if not dest:
0a180f
                 return
0a180f
 
0a180f
@@ -384,7 +384,7 @@ class FileCacheArchive(Archive):
0a180f
             # over any exixting content in the archive, since it is used by
0a180f
             # the Plugin postprocessing hooks to perform regex substitution
0a180f
             # on file content.
0a180f
-            dest = self._check_path(dest, P_FILE, force=True)
0a180f
+            dest = self.check_path(dest, P_FILE, force=True)
0a180f
 
0a180f
             f = codecs.open(dest, mode, encoding='utf-8')
0a180f
             if isinstance(content, bytes):
0a180f
@@ -397,7 +397,7 @@ class FileCacheArchive(Archive):
0a180f
 
0a180f
     def add_binary(self, content, dest):
0a180f
         with self._path_lock:
0a180f
-            dest = self._check_path(dest, P_FILE)
0a180f
+            dest = self.check_path(dest, P_FILE)
0a180f
             if not dest:
0a180f
                 return
0a180f
 
0a180f
@@ -409,7 +409,7 @@ class FileCacheArchive(Archive):
0a180f
     def add_link(self, source, link_name):
0a180f
         self.log_debug("adding symlink at '%s' -> '%s'" % (link_name, source))
0a180f
         with self._path_lock:
0a180f
-            dest = self._check_path(link_name, P_LINK)
0a180f
+            dest = self.check_path(link_name, P_LINK)
0a180f
             if not dest:
0a180f
                 return
0a180f
 
0a180f
@@ -484,10 +484,10 @@ class FileCacheArchive(Archive):
0a180f
         """
0a180f
         # Establish path structure
0a180f
         with self._path_lock:
0a180f
-            self._check_path(path, P_DIR)
0a180f
+            self.check_path(path, P_DIR)
0a180f
 
0a180f
     def add_node(self, path, mode, device):
0a180f
-        dest = self._check_path(path, P_NODE)
0a180f
+        dest = self.check_path(path, P_NODE)
0a180f
         if not dest:
0a180f
             return
0a180f
 
0a180f
diff --git a/sos/report/__init__.py b/sos/report/__init__.py
0a180f
index 249ff119..46c7f219 100644
0a180f
--- a/sos/report/__init__.py
0a180f
+++ b/sos/report/__init__.py
0a180f
@@ -476,7 +476,8 @@ class SoSReport(SoSComponent):
0a180f
             'verbosity': self.opts.verbosity,
0a180f
             'cmdlineopts': self.opts,
0a180f
             'devices': self.devices,
0a180f
-            'namespaces': self.namespaces
0a180f
+            'namespaces': self.namespaces,
0a180f
+            'tempfile_util': self.tempfile_util
0a180f
         }
0a180f
 
0a180f
     def get_temp_file(self):
0a180f
@@ -1075,7 +1076,12 @@ class SoSReport(SoSComponent):
0a180f
                 _plug.manifest.add_field('end_time', end)
0a180f
                 _plug.manifest.add_field('run_time', end - start)
0a180f
             except TimeoutError:
0a180f
-                self.ui_log.error("\n Plugin %s timed out\n" % plugin[1])
0a180f
+                msg = "Plugin %s timed out" % plugin[1]
0a180f
+                # log to ui_log.error to show the user, log to soslog.info
0a180f
+                # so that someone investigating the sos execution has it all
0a180f
+                # in one place, but without double notifying the user.
0a180f
+                self.ui_log.error("\n %s\n" % msg)
0a180f
+                self.soslog.info(msg)
0a180f
                 self.running_plugs.remove(plugin[1])
0a180f
                 self.loaded_plugins[plugin[0]-1][1].set_timeout_hit()
0a180f
                 pool.shutdown(wait=True)
0a180f
diff --git a/sos/report/plugins/__init__.py b/sos/report/plugins/__init__.py
0a180f
index 3ff7c191..38529a9d 100644
0a180f
--- a/sos/report/plugins/__init__.py
0a180f
+++ b/sos/report/plugins/__init__.py
0a180f
@@ -15,6 +15,7 @@ from sos.utilities import (sos_get_command_output, import_module, grep,
0a180f
                            path_exists, path_isdir, path_isfile, path_islink,
0a180f
                            listdir, path_join)
0a180f
 
0a180f
+from sos.archive import P_FILE
0a180f
 import os
0a180f
 import glob
0a180f
 import re
0a180f
@@ -1686,6 +1687,8 @@ class Plugin():
0a180f
             kwargs['priority'] = 10
0a180f
         if 'changes' not in kwargs:
0a180f
             kwargs['changes'] = False
0a180f
+        if self.get_option('all_logs') or kwargs['sizelimit'] == 0:
0a180f
+            kwargs['to_file'] = True
0a180f
         soscmd = SoSCommand(**kwargs)
0a180f
         self._log_debug("packed command: " + soscmd.__str__())
0a180f
         for _skip_cmd in self.skip_commands:
0a180f
@@ -1707,7 +1710,8 @@ class Plugin():
0a180f
                        chroot=True, runat=None, env=None, binary=False,
0a180f
                        sizelimit=None, pred=None, subdir=None,
0a180f
                        changes=False, foreground=False, tags=[],
0a180f
-                       priority=10, cmd_as_tag=False, container=None):
0a180f
+                       priority=10, cmd_as_tag=False, container=None,
0a180f
+                       to_file=False):
0a180f
         """Run a program or a list of programs and collect the output
0a180f
 
0a180f
         Output will be limited to `sizelimit`, collecting the last X amount
0a180f
@@ -1776,6 +1780,10 @@ class Plugin():
0a180f
         :param container: Run the specified `cmds` inside a container with this
0a180f
                           ID or name
0a180f
         :type container:  ``str``
0a180f
+
0a180f
+        :param to_file: Should command output be written directly to a new
0a180f
+                        file rather than stored in memory?
0a180f
+        :type to_file:  ``bool``
0a180f
         """
0a180f
         if isinstance(cmds, str):
0a180f
             cmds = [cmds]
0a180f
@@ -1863,7 +1863,7 @@
0a180f
                                  pred=pred, subdir=subdir, tags=tags,
0a180f
                                  changes=changes, foreground=foreground,
0a180f
                                  priority=priority, cmd_as_tag=cmd_as_tag,
0a180f
-                                 container_cmd=container_cmd)
0a180f
+                                 to_file=to_file, container_cmd=container_cmd)
0a180f
 
0a180f
     def add_cmd_tags(self, tagdict):
0a180f
         """Retroactively add tags to any commands that have been run by this
0a180f
@@ -2014,7 +2014,7 @@
0a180f
                             stderr=True, chroot=True, runat=None, env=None,
0a180f
                             binary=False, sizelimit=None, subdir=None,
0a180f
                             changes=False, foreground=False, tags=[],
0a180f
-                            priority=10, cmd_as_tag=False,
0a180f
+                            priority=10, cmd_as_tag=False, to_file=False,
0a180f
                             container_cmd=False):
0a180f
         """Execute a command and save the output to a file for inclusion in the
0a180f
         report.
0a180f
@@ -2041,6 +2041,8 @@
0a180f
                                         TTY
0a180f
             :param tags:                Add tags in the archive manifest
0a180f
             :param cmd_as_tag:          Format command string to tag
0a180f
+            :param to_file:             Write output directly to file instead
0a180f
+                                        of saving in memory
0a180f
 
0a180f
         :returns:       dict containing status, output, and filename in the
0a180f
                         archive for the executed cmd
0a180f
@@ -2074,27 +2074,46 @@
0a180f
         else:
0a180f
             root = None
0a180f
 
0a180f
+        if suggest_filename:
0a180f
+            outfn = self._make_command_filename(suggest_filename, subdir)
0a180f
+        else:
0a180f
+            outfn = self._make_command_filename(cmd, subdir)
0a180f
+
0a180f
+        outfn_strip = outfn[len(self.commons['cmddir'])+1:]
0a180f
+
0a180f
+        if to_file:
0a180f
+            self._log_debug("collecting '%s' output directly to disk"
0a180f
+                            % cmd)
0a180f
+            self.archive.check_path(outfn, P_FILE)
0a180f
+            out_file = os.path.join(self.archive.get_archive_path(), outfn)
0a180f
+        else:
0a180f
+            out_file = False
0a180f
+
0a180f
         start = time()
0a180f
 
0a180f
         result = sos_get_command_output(
0a180f
             cmd, timeout=timeout, stderr=stderr, chroot=root,
0a180f
             chdir=runat, env=_env, binary=binary, sizelimit=sizelimit,
0a180f
-            poller=self.check_timeout, foreground=foreground
0a180f
+            poller=self.check_timeout, foreground=foreground,
0a180f
+            to_file=out_file
0a180f
         )
0a180f
 
0a180f
         end = time()
0a180f
         run_time = end - start
0a180f
 
0a180f
         if result['status'] == 124:
0a180f
-            self._log_warn(
0a180f
-                "command '%s' timed out after %ds" % (cmd, timeout)
0a180f
-            )
0a180f
+            warn = "command '%s' timed out after %ds" % (cmd, timeout)
0a180f
+            self._log_warn(warn)
0a180f
+            if to_file:
0a180f
+                msg = (" - output up until the timeout may be available at "
0a180f
+                       "%s" % outfn)
0a180f
+                self._log_debug("%s%s" % (warn, msg))
0a180f
 
0a180f
         manifest_cmd = {
0a180f
             'command': cmd.split(' ')[0],
0a180f
             'parameters': cmd.split(' ')[1:],
0a180f
             'exec': cmd,
0a180f
-            'filepath': None,
0a180f
+            'filepath': outfn if to_file else None,
0a180f
             'truncated': result['truncated'],
0a180f
             'return_code': result['status'],
0a180f
             'priority': priority,
0a180f
@@ -2060,7 +2090,7 @@ class Plugin():
0a180f
                     result = sos_get_command_output(
0a180f
                         cmd, timeout=timeout, chroot=False, chdir=runat,
0a180f
                         env=env, binary=binary, sizelimit=sizelimit,
0a180f
-                        poller=self.check_timeout
0a180f
+                        poller=self.check_timeout, to_file=out_file
0a180f
                     )
0a180f
                     run_time = time() - start
0a180f
             self._log_debug("could not run '%s': command not found" % cmd)
0a180f
@@ -2077,22 +2107,15 @@ class Plugin():
0a180f
         if result['truncated']:
0a180f
             self._log_info("collected output of '%s' was truncated"
0a180f
                            % cmd.split()[0])
0a180f
-
0a180f
-        if suggest_filename:
0a180f
-            outfn = self._make_command_filename(suggest_filename, subdir)
0a180f
-        else:
0a180f
-            outfn = self._make_command_filename(cmd, subdir)
0a180f
-
0a180f
-        outfn_strip = outfn[len(self.commons['cmddir'])+1:]
0a180f
-
0a180f
-        if result['truncated']:
0a180f
             linkfn = outfn
0a180f
             outfn = outfn.replace('sos_commands', 'sos_strings') + '.tailed'
0a180f
 
0a180f
-        if binary:
0a180f
-            self.archive.add_binary(result['output'], outfn)
0a180f
-        else:
0a180f
-            self.archive.add_string(result['output'], outfn)
0a180f
+        if not to_file:
0a180f
+            if binary:
0a180f
+                self.archive.add_binary(result['output'], outfn)
0a180f
+            else:
0a180f
+                self.archive.add_string(result['output'], outfn)
0a180f
+
0a180f
         if result['truncated']:
0a180f
             # we need to manually build the relative path from the paths that
0a180f
             # exist within the build dir to properly drop these symlinks
0a180f
@@ -2543,6 +2566,9 @@ class Plugin():
0a180f
         all_logs = self.get_option("all_logs")
0a180f
         log_size = sizelimit or self.get_option("log_size")
0a180f
         log_size = max(log_size, journal_size) if not all_logs else 0
0a180f
+        if sizelimit == 0:
0a180f
+            # allow for specific sizelimit overrides in plugins
0a180f
+            log_size = 0
0a180f
 
0a180f
         if isinstance(units, str):
0a180f
             units = [units]
0a180f
diff --git a/sos/utilities.py b/sos/utilities.py
0a180f
index 70d5c0ab..f422d7a0 100644
0a180f
--- a/sos/utilities.py
0a180f
+++ b/sos/utilities.py
0a180f
@@ -110,7 +110,8 @@ def is_executable(command, sysroot=None):
0a180f
 
0a180f
 def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False,
0a180f
                            chroot=None, chdir=None, env=None, foreground=False,
0a180f
-                           binary=False, sizelimit=None, poller=None):
0a180f
+                           binary=False, sizelimit=None, poller=None,
0a180f
+                           to_file=False):
0a180f
     """Execute a command and return a dictionary of status and output,
0a180f
     optionally changing root or current working directory before
0a180f
     executing command.
0a180f
@@ -124,6 +125,12 @@ def sos_get_command_output(command, timeout=TIMEOUT_DEFAULT, stderr=False,
0a180f
         if (chdir):
0a180f
             os.chdir(chdir)
0a180f
 
0a180f
+    def _check_poller(proc):
0a180f
+        if poller():
0a180f
+            proc.terminate()
0a180f
+            raise SoSTimeoutError
0a180f
+        time.sleep(0.01)
0a180f
+
0a180f
     cmd_env = os.environ.copy()
0a180f
     # ensure consistent locale for collected command output
0a180f
     cmd_env['LC_ALL'] = 'C'
0a180f
@@ -158,24 +158,46 @@
0a180f
             expanded_args.extend(expanded_arg)
0a180f
         else:
0a180f
             expanded_args.append(arg)
0a180f
+    if to_file:
0a180f
+        _output = open(to_file, 'w')
0a180f
+    else:
0a180f
+        _output = PIPE
0a180f
     try:
0a180f
-        p = Popen(expanded_args, shell=False, stdout=PIPE,
0a180f
+        p = Popen(expanded_args, shell=False, stdout=_output,
0a180f
                   stderr=STDOUT if stderr else PIPE,
0a180f
                   bufsize=-1, env=cmd_env, close_fds=True,
0a180f
                   preexec_fn=_child_prep_fn)
0a180f
 
0a180f
-        reader = AsyncReader(p.stdout, sizelimit, binary)
0a180f
+        if not to_file:
0a180f
+            reader = AsyncReader(p.stdout, sizelimit, binary)
0a180f
+        else:
0a180f
+            reader = FakeReader(p, binary)
0a180f
+
0a180f
         if poller:
0a180f
             while reader.running:
0a180f
-                if poller():
0a180f
-                    p.terminate()
0a180f
-                    raise SoSTimeoutError
0a180f
-                time.sleep(0.01)
0a180f
-        stdout = reader.get_contents()
0a180f
-        truncated = reader.is_full
0a180f
+                _check_poller(p)
0a180f
+        else:
0a180f
+            try:
0a180f
+                # override timeout=0 to timeout=None, as Popen will treat the
0a180f
+                # former as a literal 0-second timeout
0a180f
+                p.wait(timeout if timeout else None)
0a180f
+            except Exception:
0a180f
+                p.terminate()
0a180f
+                _output.close()
0a180f
+                # until we separate timeouts from the `timeout` command
0a180f
+                # handle per-cmd timeouts via Plugin status checks
0a180f
+                return {'status': 124, 'output': reader.get_contents(),
0a180f
+                        'truncated': reader.is_full}
0a180f
+        if to_file:
0a180f
+            _output.close()
0a180f
+
0a180f
+        # wait for Popen to set the returncode
0a180f
         while p.poll() is None:
0a180f
             pass
0a180f
 
0a180f
+        stdout = reader.get_contents()
0a180f
+        truncated = reader.is_full
0a180f
+
0a180f
     except OSError as e:
0a180f
         if e.errno == errno.ENOENT:
0a180f
             return {'status': 127, 'output': "", 'truncated': ''}
0a180f
@@ -256,6 +285,28 @@ def path_join(path, *p, sysroot=os.sep):
0a180f
     return os.path.join(path, *p)
0a180f
 
0a180f
 
0a180f
+class FakeReader():
0a180f
+    """Used as a replacement AsyncReader for when we are writing directly to
0a180f
+    disk, and allows us to keep more simplified flows for executing,
0a180f
+    monitoring, and collecting command output.
0a180f
+    """
0a180f
+
0a180f
+    def __init__(self, process, binary):
0a180f
+        self.process = process
0a180f
+        self.binary = binary
0a180f
+
0a180f
+    @property
0a180f
+    def is_full(self):
0a180f
+        return False
0a180f
+
0a180f
+    def get_contents(self):
0a180f
+        return '' if not self.binary else b''
0a180f
+
0a180f
+    @property
0a180f
+    def running(self):
0a180f
+        return self.process.poll() is None
0a180f
+
0a180f
+
0a180f
 class AsyncReader(threading.Thread):
0a180f
     """Used to limit command output to a given size without deadlocking
0a180f
     sos.
0a180f
-- 
0a180f
2.27.0
0a180f