Blame SOURCES/sos-bz2065805-collect-pacemaker-cluster.patch

46fb0a
From 3b84b4ccfa9e4924a5a3829d3810568dfb69bf63 Mon Sep 17 00:00:00 2001
46fb0a
From: Jake Hunsaker <jhunsake@redhat.com>
46fb0a
Date: Fri, 18 Mar 2022 16:25:35 -0400
46fb0a
Subject: [PATCH 1/2] [pacemaker] Redesign node enumeration logic
46fb0a
46fb0a
It has been found that `pcs status` output is liable to change, which
46fb0a
ends up breaking our parsing of node lists when using it on newer
46fb0a
versions.
46fb0a
46fb0a
Instead, first try to parse through `crm_mon` output, which is what `pcs
46fb0a
status` uses under the hood, but as a stable and reliable xml format.
46fb0a
46fb0a
Failing that, for example if the `--primary` node is not functioning as
46fb0a
part of the cluster, source `/etc/corosync/corosync.conf` instead.
46fb0a
46fb0a
Related: RHBZ2065805
46fb0a
Related: RHBZ2065811
46fb0a
46fb0a
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
46fb0a
---
46fb0a
 sos/collector/clusters/pacemaker.py | 110 +++++++++++++++++++---------
46fb0a
 1 file changed, 76 insertions(+), 34 deletions(-)
46fb0a
46fb0a
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
46fb0a
index 55024314..49d0ce51 100644
46fb0a
--- a/sos/collector/clusters/pacemaker.py
46fb0a
+++ b/sos/collector/clusters/pacemaker.py
46fb0a
@@ -8,7 +8,11 @@
46fb0a
 #
46fb0a
 # See the LICENSE file in the source distribution for further information.
46fb0a
 
46fb0a
+import re
46fb0a
+
46fb0a
 from sos.collector.clusters import Cluster
46fb0a
+from setuptools._vendor.packaging import version
46fb0a
+from xml.etree import ElementTree
46fb0a
 
46fb0a
 
46fb0a
 class pacemaker(Cluster):
46fb0a
@@ -18,42 +22,80 @@ class pacemaker(Cluster):
46fb0a
     packages = ('pacemaker',)
46fb0a
     option_list = [
46fb0a
         ('online', True, 'Collect nodes listed as online'),
46fb0a
-        ('offline', True, 'Collect nodes listed as offline')
46fb0a
+        ('offline', True, 'Collect nodes listed as offline'),
46fb0a
+        ('only-corosync', False, 'Only use corosync.conf to enumerate nodes')
46fb0a
     ]
46fb0a
 
46fb0a
     def get_nodes(self):
46fb0a
-        self.res = self.exec_primary_cmd('pcs status')
46fb0a
-        if self.res['status'] != 0:
46fb0a
-            self.log_error('Cluster status could not be determined. Is the '
46fb0a
-                           'cluster running on this node?')
46fb0a
-            return []
46fb0a
-        if 'node names do not match' in self.res['output']:
46fb0a
-            self.log_warn('Warning: node name mismatch reported. Attempts to '
46fb0a
-                          'connect to some nodes may fail.\n')
46fb0a
-        return self.parse_pcs_output()
46fb0a
-
46fb0a
-    def parse_pcs_output(self):
46fb0a
-        nodes = []
46fb0a
-        if self.get_option('online'):
46fb0a
-            nodes += self.get_online_nodes()
46fb0a
-        if self.get_option('offline'):
46fb0a
-            nodes += self.get_offline_nodes()
46fb0a
-        return nodes
46fb0a
-
46fb0a
-    def get_online_nodes(self):
46fb0a
-        for line in self.res['output'].splitlines():
46fb0a
-            if line.startswith('Online:'):
46fb0a
-                nodes = line.split('[')[1].split(']')[0]
46fb0a
-                return [n for n in nodes.split(' ') if n]
46fb0a
-
46fb0a
-    def get_offline_nodes(self):
46fb0a
-        offline = []
46fb0a
-        for line in self.res['output'].splitlines():
46fb0a
-            if line.startswith('Node') and line.endswith('(offline)'):
46fb0a
-                offline.append(line.split()[1].replace(':', ''))
46fb0a
-            if line.startswith('OFFLINE:'):
46fb0a
-                nodes = line.split('[')[1].split(']')[0]
46fb0a
-                offline.extend([n for n in nodes.split(' ') if n])
46fb0a
-        return offline
46fb0a
+        self.nodes = []
46fb0a
+        # try crm_mon first
46fb0a
+        try:
46fb0a
+            if not self.get_option('only-corosync'):
46fb0a
+                try:
46fb0a
+                    self.get_nodes_from_crm()
46fb0a
+                except Exception as err:
46fb0a
+                    self.log_warn("Falling back to sourcing corosync.conf. "
46fb0a
+                                  "Could not parse crm_mon output: %s" % err)
46fb0a
+            if not self.nodes:
46fb0a
+                # fallback to corosync.conf, in case the node we're inspecting
46fb0a
+                # is offline from the cluster
46fb0a
+                self.get_nodes_from_corosync()
46fb0a
+        except Exception as err:
46fb0a
+            self.log_error("Could not determine nodes from cluster: %s" % err)
46fb0a
+
46fb0a
+        _shorts = [n for n in self.nodes if '.' not in n]
46fb0a
+        if _shorts:
46fb0a
+            self.log_warn(
46fb0a
+                "WARNING: Node addresses '%s' may not resolve locally if you "
46fb0a
+                "are not running on a node in the cluster. Try using option "
46fb0a
+                "'-c pacemaker.only-corosync' if these connections fail."
46fb0a
+                % ','.join(_shorts)
46fb0a
+            )
46fb0a
+        return self.nodes
46fb0a
+
46fb0a
+    def get_nodes_from_crm(self):
46fb0a
+        """
46fb0a
+        Try to parse crm_mon output for node list and status.
46fb0a
+        """
46fb0a
+        xmlopt = '--output-as=xml'
46fb0a
+        # older pacemaker had a different option for xml output
46fb0a
+        _ver = self.exec_primary_cmd('crm_mon --version')
46fb0a
+        if _ver['status'] == 0:
46fb0a
+            cver = _ver['output'].split()[1].split('-')[0]
46fb0a
+            if not version.parse(cver) > version.parse('2.0.3'):
46fb0a
+                xmlopt = '--as-xml'
46fb0a
+        else:
46fb0a
+            return
46fb0a
+        _out = self.exec_primary_cmd(
46fb0a
+            "crm_mon --one-shot --inactive %s" % xmlopt,
46fb0a
+            need_root=True
46fb0a
+        )
46fb0a
+        if _out['status'] == 0:
46fb0a
+            self.parse_crm_xml(_out['output'])
46fb0a
+
46fb0a
+    def parse_crm_xml(self, xmlstring):
46fb0a
+        """
46fb0a
+        Parse the xml output string provided by crm_mon
46fb0a
+        """
46fb0a
+        _xml = ElementTree.fromstring(xmlstring)
46fb0a
+        nodes = _xml.find('nodes')
46fb0a
+        for node in nodes:
46fb0a
+            _node = node.attrib
46fb0a
+            if self.get_option('online') and _node['online'] == 'true':
46fb0a
+                self.nodes.append(_node['name'])
46fb0a
+            elif self.get_option('offline') and _node['online'] == 'false':
46fb0a
+                self.nodes.append(_node['name'])
46fb0a
+
46fb0a
+    def get_nodes_from_corosync(self):
46fb0a
+        """
46fb0a
+        As a fallback measure, read corosync.conf to get the node list. Note
46fb0a
+        that this prevents us from separating online nodes from offline nodes.
46fb0a
+        """
46fb0a
+        self.log_warn("WARNING: unable to distinguish online nodes from "
46fb0a
+                      "offline nodes when sourcing from corosync.conf")
46fb0a
+        cc = self.primary.read_file('/etc/corosync/corosync.conf')
46fb0a
+        nodes = re.findall(r'((\sring0_addr:)(.*))', cc)
46fb0a
+        for node in nodes:
46fb0a
+            self.nodes.append(node[-1].strip())
46fb0a
 
46fb0a
 # vim: set et ts=4 sw=4 :
46fb0a
-- 
46fb0a
2.34.3
46fb0a
46fb0a
46fb0a
From 6701a7d77ecc998b018b54ecc00f9fd102ae9518 Mon Sep 17 00:00:00 2001
46fb0a
From: Jake Hunsaker <jhunsake@redhat.com>
46fb0a
Date: Mon, 21 Mar 2022 12:05:59 -0400
46fb0a
Subject: [PATCH 2/2] [clusters] Allow clusters to not add localhost to node
46fb0a
 list
46fb0a
46fb0a
For most of our supported clusters, we end up needing to add the
46fb0a
local host executing `sos collect` to the node list (unless `--no-local`
46fb0a
is used) as that accounts for the primary node that may otherwise be
46fb0a
left off. However, this is not helpful for clusters that may reports
46fb0a
node names as something other than resolveable names. In those cases,
46fb0a
such as with pacemaker, adding the local hostname may result in
46fb0a
duplicate collections.
46fb0a
46fb0a
Add a toggle to cluster profiles via a new `strict_node_list` class attr
46fb0a
that, if True, will skip this addition. This toggle is default `False`
46fb0a
to preserve existing behavior, and is now enabled for `pacemaker`
46fb0a
specifically.
46fb0a
46fb0a
Related: RHBZ#2065821
46fb0a
46fb0a
Signed-off-by: Jake Hunsaker <jhunsake@redhat.com>
46fb0a
---
46fb0a
 sos/collector/__init__.py           | 3 ++-
46fb0a
 sos/collector/clusters/__init__.py  | 4 ++++
46fb0a
 sos/collector/clusters/pacemaker.py | 1 +
46fb0a
 3 files changed, 7 insertions(+), 1 deletion(-)
46fb0a
46fb0a
diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py
46fb0a
index a8bb0064..d898ca34 100644
46fb0a
--- a/sos/collector/__init__.py
46fb0a
+++ b/sos/collector/__init__.py
46fb0a
@@ -1073,7 +1073,8 @@ class SoSCollector(SoSComponent):
46fb0a
             for node in self.node_list:
46fb0a
                 if host == node.split('.')[0]:
46fb0a
                     self.node_list.remove(node)
46fb0a
-            self.node_list.append(self.hostname)
46fb0a
+            if not self.cluster.strict_node_list:
46fb0a
+                self.node_list.append(self.hostname)
46fb0a
         self.reduce_node_list()
46fb0a
         try:
46fb0a
             _node_max = len(max(self.node_list, key=len))
46fb0a
diff --git a/sos/collector/clusters/__init__.py b/sos/collector/clusters/__init__.py
46fb0a
index f3f550ad..f00677b8 100644
46fb0a
--- a/sos/collector/clusters/__init__.py
46fb0a
+++ b/sos/collector/clusters/__init__.py
46fb0a
@@ -57,6 +57,10 @@ class Cluster():
46fb0a
     sos_plugin_options = {}
46fb0a
     sos_preset = ''
46fb0a
     cluster_name = None
46fb0a
+    # set this to True if the local host running collect should *not* be
46fb0a
+    # forcibly added to the node list. This can be helpful in situations where
46fb0a
+    # the host's fqdn and the name the cluster uses are different
46fb0a
+    strict_node_list = False
46fb0a
 
46fb0a
     def __init__(self, commons):
46fb0a
         self.primary = None
46fb0a
diff --git a/sos/collector/clusters/pacemaker.py b/sos/collector/clusters/pacemaker.py
46fb0a
index 49d0ce51..bebcb265 100644
46fb0a
--- a/sos/collector/clusters/pacemaker.py
46fb0a
+++ b/sos/collector/clusters/pacemaker.py
46fb0a
@@ -20,6 +20,7 @@ class pacemaker(Cluster):
46fb0a
     cluster_name = 'Pacemaker High Availability Cluster Manager'
46fb0a
     sos_plugins = ['pacemaker']
46fb0a
     packages = ('pacemaker',)
46fb0a
+    strict_node_list = True
46fb0a
     option_list = [
46fb0a
         ('online', True, 'Collect nodes listed as online'),
46fb0a
         ('offline', True, 'Collect nodes listed as offline'),
46fb0a
-- 
46fb0a
2.34.3
46fb0a