Blame SOURCES/sos-bz2082914-collect-pacemaker-cluster.patch

4b82b4
From 3b84b4ccfa9e4924a5a3829d3810568dfb69bf63 Mon Sep 17 00:00:00 2001
4b82b4
From: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
Date: Fri, 18 Mar 2022 16:25:35 -0400
4b82b4
Subject: [PATCH 1/2] [pacemaker] Redesign node enumeration logic
4b82b4
4b82b4
It has been found that `pcs status` output is liable to change, which
4b82b4
ends up breaking our parsing of node lists when using it on newer
4b82b4
versions.
4b82b4
4b82b4
Instead, first try to parse through `crm_mon` output, which is what `pcs
4b82b4
status` uses under the hood, but as a stable and reliable xml format.
4b82b4
4b82b4
Failing that, for example if the `--primary` node is not functioning as
4b82b4
part of the cluster, source `/etc/corosync/corosync.conf` instead.
4b82b4
4b82b4
Related: RHBZ2065805
4b82b4
Related: RHBZ2065811
4b82b4
4b82b4
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
---
4b82b4
 sos/collector/clusters/pacemaker.py | 110 +++++++++++++++++++---------
4b82b4
 1 file changed, 76 insertions(+), 34 deletions(-)
4b82b4
4b82b4
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
4b82b4
index 55024314..49d0ce51 100644
4b82b4
--- a/sos/collector/clusters/pacemaker.py
4b82b4
+++ b/sos/collector/clusters/pacemaker.py
4b82b4
@@ -8,7 +8,11 @@
4b82b4
 #
4b82b4
 # See the LICENSE file in the source distribution for further information.
4b82b4
 
4b82b4
+import re
4b82b4
+
4b82b4
 from sos.collector.clusters import Cluster
4b82b4
+from setuptools._vendor.packaging import version
4b82b4
+from xml.etree import ElementTree
4b82b4
 
4b82b4
 
4b82b4
 class pacemaker(Cluster):
4b82b4
@@ -18,42 +22,80 @@ class pacemaker(Cluster):
4b82b4
     packages = ('pacemaker',)
4b82b4
     option_list = [
4b82b4
         ('online', True, 'Collect nodes listed as online'),
4b82b4
-        ('offline', True, 'Collect nodes listed as offline')
4b82b4
+        ('offline', True, 'Collect nodes listed as offline'),
4b82b4
+        ('only-corosync', False, 'Only use corosync.conf to enumerate nodes')
4b82b4
     ]
4b82b4
 
4b82b4
     def get_nodes(self):
4b82b4
-        self.res = self.exec_primary_cmd('pcs status')
4b82b4
-        if self.res['status'] != 0:
4b82b4
-            self.log_error('Cluster status could not be determined. Is the '
4b82b4
-                           'cluster running on this node?')
4b82b4
-            return []
4b82b4
-        if 'node names do not match' in self.res['output']:
4b82b4
-            self.log_warn('Warning: node name mismatch reported. Attempts to '
4b82b4
-                          'connect to some nodes may fail.\n')
4b82b4
-        return self.parse_pcs_output()
4b82b4
-
4b82b4
-    def parse_pcs_output(self):
4b82b4
-        nodes = []
4b82b4
-        if self.get_option('online'):
4b82b4
-            nodes += self.get_online_nodes()
4b82b4
-        if self.get_option('offline'):
4b82b4
-            nodes += self.get_offline_nodes()
4b82b4
-        return nodes
4b82b4
-
4b82b4
-    def get_online_nodes(self):
4b82b4
-        for line in self.res['output'].splitlines():
4b82b4
-            if line.startswith('Online:'):
4b82b4
-                nodes = line.split('[')[1].split(']')[0]
4b82b4
-                return [n for n in nodes.split(' ') if n]
4b82b4
-
4b82b4
-    def get_offline_nodes(self):
4b82b4
-        offline = []
4b82b4
-        for line in self.res['output'].splitlines():
4b82b4
-            if line.startswith('Node') and line.endswith('(offline)'):
4b82b4
-                offline.append(line.split()[1].replace(':', ''))
4b82b4
-            if line.startswith('OFFLINE:'):
4b82b4
-                nodes = line.split('[')[1].split(']')[0]
4b82b4
-                offline.extend([n for n in nodes.split(' ') if n])
4b82b4
-        return offline
4b82b4
+        self.nodes = []
4b82b4
+        # try crm_mon first
4b82b4
+        try:
4b82b4
+            if not self.get_option('only-corosync'):
4b82b4
+                try:
4b82b4
+                    self.get_nodes_from_crm()
4b82b4
+                except Exception as err:
4b82b4
+                    self.log_warn("Falling back to sourcing corosync.conf. "
4b82b4
+                                  "Could not parse crm_mon output: %s" % err)
4b82b4
+            if not self.nodes:
4b82b4
+                # fallback to corosync.conf, in case the node we're inspecting
4b82b4
+                # is offline from the cluster
4b82b4
+                self.get_nodes_from_corosync()
4b82b4
+        except Exception as err:
4b82b4
+            self.log_error("Could not determine nodes from cluster: %s" % err)
4b82b4
+
4b82b4
+        _shorts = [n for n in self.nodes if '.' not in n]
4b82b4
+        if _shorts:
4b82b4
+            self.log_warn(
4b82b4
+                "WARNING: Node addresses '%s' may not resolve locally if you "
4b82b4
+                "are not running on a node in the cluster. Try using option "
4b82b4
+                "'-c pacemaker.only-corosync' if these connections fail."
4b82b4
+                % ','.join(_shorts)
4b82b4
+            )
4b82b4
+        return self.nodes
4b82b4
+
4b82b4
+    def get_nodes_from_crm(self):
4b82b4
+        """
4b82b4
+        Try to parse crm_mon output for node list and status.
4b82b4
+        """
4b82b4
+        xmlopt = '--output-as=xml'
4b82b4
+        # older pacemaker had a different option for xml output
4b82b4
+        _ver = self.exec_primary_cmd('crm_mon --version')
4b82b4
+        if _ver['status'] == 0:
4b82b4
+            cver = _ver['output'].split()[1].split('-')[0]
4b82b4
+            if not version.parse(cver) > version.parse('2.0.3'):
4b82b4
+                xmlopt = '--as-xml'
4b82b4
+        else:
4b82b4
+            return
4b82b4
+        _out = self.exec_primary_cmd(
4b82b4
+            "crm_mon --one-shot --inactive %s" % xmlopt,
4b82b4
+            need_root=True
4b82b4
+        )
4b82b4
+        if _out['status'] == 0:
4b82b4
+            self.parse_crm_xml(_out['output'])
4b82b4
+
4b82b4
+    def parse_crm_xml(self, xmlstring):
4b82b4
+        """
4b82b4
+        Parse the xml output string provided by crm_mon
4b82b4
+        """
4b82b4
+        _xml = ElementTree.fromstring(xmlstring)
4b82b4
+        nodes = _xml.find('nodes')
4b82b4
+        for node in nodes:
4b82b4
+            _node = node.attrib
4b82b4
+            if self.get_option('online') and _node['online'] == 'true':
4b82b4
+                self.nodes.append(_node['name'])
4b82b4
+            elif self.get_option('offline') and _node['online'] == 'false':
4b82b4
+                self.nodes.append(_node['name'])
4b82b4
+
4b82b4
+    def get_nodes_from_corosync(self):
4b82b4
+        """
4b82b4
+        As a fallback measure, read corosync.conf to get the node list. Note
4b82b4
+        that this prevents us from separating online nodes from offline nodes.
4b82b4
+        """
4b82b4
+        self.log_warn("WARNING: unable to distinguish online nodes from "
4b82b4
+                      "offline nodes when sourcing from corosync.conf")
4b82b4
+        cc = self.primary.read_file('/etc/corosync/corosync.conf')
4b82b4
+        nodes = re.findall(r'((\sring0_addr:)(.*))', cc)
4b82b4
+        for node in nodes:
4b82b4
+            self.nodes.append(node[-1].strip())
4b82b4
 
4b82b4
 # vim: set et ts=4 sw=4 :
4b82b4
-- 
4b82b4
2.34.3
4b82b4
4b82b4
4b82b4
From 6701a7d77ecc998b018b54ecc00f9fd102ae9518 Mon Sep 17 00:00:00 2001
4b82b4
From: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
Date: Mon, 21 Mar 2022 12:05:59 -0400
4b82b4
Subject: [PATCH 2/2] [clusters] Allow clusters to not add localhost to node
4b82b4
 list
4b82b4
4b82b4
For most of our supported clusters, we end up needing to add the
4b82b4
local host executing `sos collect` to the node list (unless `--no-local`
4b82b4
is used) as that accounts for the primary node that may otherwise be
4b82b4
left off. However, this is not helpful for clusters that may reports
4b82b4
node names as something other than resolveable names. In those cases,
4b82b4
such as with pacemaker, adding the local hostname may result in
4b82b4
duplicate collections.
4b82b4
4b82b4
Add a toggle to cluster profiles via a new `strict_node_list` class attr
4b82b4
that, if True, will skip this addition. This toggle is default `False`
4b82b4
to preserve existing behavior, and is now enabled for `pacemaker`
4b82b4
specifically.
4b82b4
4b82b4
Related: RHBZ#2065821
4b82b4
4b82b4
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
4b82b4
---
4b82b4
 sos/collector/__init__.py           | 3 ++-
4b82b4
 sos/collector/clusters/__init__.py  | 4 ++++
4b82b4
 sos/collector/clusters/pacemaker.py | 1 +
4b82b4
 3 files changed, 7 insertions(+), 1 deletion(-)
4b82b4
4b82b4
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
4b82b4
index a8bb0064..d898ca34 100644
4b82b4
--- a/sos/collector/__init__.py
4b82b4
+++ b/sos/collector/__init__.py
4b82b4
@@ -1073,7 +1073,8 @@ class SoSCollector(SoSComponent):
4b82b4
             for node in self.node_list:
4b82b4
                 if host == node.split('.')[0]:
4b82b4
                     self.node_list.remove(node)
4b82b4
-            self.node_list.append(self.hostname)
4b82b4
+            if not self.cluster.strict_node_list:
4b82b4
+                self.node_list.append(self.hostname)
4b82b4
         self.reduce_node_list()
4b82b4
         try:
4b82b4
             _node_max = len(max(self.node_list, key=len))
4b82b4
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
4b82b4
index f3f550ad..f00677b8 100644
4b82b4
--- a/sos/collector/clusters/__init__.py
4b82b4
+++ b/sos/collector/clusters/__init__.py
4b82b4
@@ -57,6 +57,10 @@ class Cluster():
4b82b4
     sos_plugin_options = {}
4b82b4
     sos_preset = ''
4b82b4
     cluster_name = None
4b82b4
+    # set this to True if the local host running collect should *not* be
4b82b4
+    # forcibly added to the node list. This can be helpful in situations where
4b82b4
+    # the host's fqdn and the name the cluster uses are different
4b82b4
+    strict_node_list = False
4b82b4
 
4b82b4
     def __init__(self, commons):
4b82b4
         self.primary = None
4b82b4
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
4b82b4
index 49d0ce51..bebcb265 100644
4b82b4
--- a/sos/collector/clusters/pacemaker.py
4b82b4
+++ b/sos/collector/clusters/pacemaker.py
4b82b4
@@ -20,6 +20,7 @@ class pacemaker(Cluster):
4b82b4
     cluster_name = 'Pacemaker High Availability Cluster Manager'
4b82b4
     sos_plugins = ['pacemaker']
4b82b4
     packages = ('pacemaker',)
4b82b4
+    strict_node_list = True
4b82b4
     option_list = [
4b82b4
         ('online', True, 'Collect nodes listed as online'),
4b82b4
         ('offline', True, 'Collect nodes listed as offline'),
4b82b4
-- 
4b82b4
2.34.3
4b82b4