Blob Blame History Raw
From 5d8bab038a7aa64c38b79e5de9579af4c73e70a2 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Thu, 14 Jul 2016 17:04:04 +0200
Subject: [PATCH] fixes for pcs cli running on a remote node

---
 pcs/acl.py             |   2 +-
 pcs/cluster.py         |  13 ++++++-
 pcs/config.py          |  37 +++++++++++++-----
 pcs/constraint.py      |   2 +-
 pcs/prop.py            |  16 +-------
 pcs/quorum.py          |   3 +-
 pcs/status.py          |  29 +++++---------
 pcs/stonith.py         |  12 +++++-
 pcs/utils.py           | 104 +++++++++++++++++++++++++++++++------------------
 pcsd/cluster_entity.rb |   4 +-
 pcsd/pcs.rb            |   4 ++
 pcsd/remote.rb         |  14 ++++++-
 12 files changed, 149 insertions(+), 91 deletions(-)

diff --git a/pcs/acl.py b/pcs/acl.py
index 118ceed..0378c10 100644
--- a/pcs/acl.py
+++ b/pcs/acl.py
@@ -55,7 +55,7 @@ def acl_cmd(argv):
 def acl_show(argv):
     dom = utils.get_cib_dom()
 
-    properties = prop.get_set_properties(defaults=prop.get_default_properties())
+    properties = utils.get_set_properties(defaults=prop.get_default_properties())
     acl_enabled = properties.get("enable-acl", "").lower()
     if is_true(acl_enabled):
         print("ACLs are enabled")
diff --git a/pcs/cluster.py b/pcs/cluster.py
index 4155103..13446d4 100644
--- a/pcs/cluster.py
+++ b/pcs/cluster.py
@@ -1157,7 +1157,18 @@ def stop_cluster_corosync():
                 utils.err("unable to stop {0}".format(service))
 
 def kill_cluster(argv):
-    daemons = ["crmd", "pengine", "attrd", "lrmd", "stonithd", "cib", "pacemakerd", "corosync-qdevice", "corosync"]
+    daemons = [
+        "crmd",
+        "pengine",
+        "attrd",
+        "lrmd",
+        "stonithd",
+        "cib",
+        "pacemakerd",
+        "pacemaker_remoted",
+        "corosync-qdevice",
+        "corosync",
+    ]
     dummy_output, dummy_retval = utils.run(["killall", "-9"] + daemons)
 #    if dummy_retval != 0:
 #        print "Error: unable to execute killall -9"
diff --git a/pcs/config.py b/pcs/config.py
index 3d86b39..9119c3c 100644
--- a/pcs/config.py
+++ b/pcs/config.py
@@ -95,14 +95,22 @@ def config_show(argv):
     print()
     config_show_cib()
     if (
-        utils.is_rhel6()
-        or
-        (not utils.usefile and "--corosync_conf" not in utils.pcs_options)
+        utils.hasCorosyncConf()
+        and
+        (
+            utils.is_rhel6()
+            or
+            (not utils.usefile and "--corosync_conf" not in utils.pcs_options)
+        )
     ):
         # with corosync 1 and cman, uid gid is part of cluster.conf file
         # with corosync 2, uid gid is in a separate directory
         cluster.cluster_uidgid([], True)
-    if "--corosync_conf" in utils.pcs_options or not utils.is_rhel6():
+    if (
+        "--corosync_conf" in utils.pcs_options
+        or
+        (not utils.is_rhel6() and utils.hasCorosyncConf())
+    ):
         print()
         print("Quorum:")
         try:
@@ -267,7 +275,16 @@ def config_restore_remote(infile_name, infile_obj):
                 err_msgs.append(output)
                 continue
             status = json.loads(output)
-            if status["corosync"] or status["pacemaker"] or status["cman"]:
+            if (
+                status["corosync"]
+                or
+                status["pacemaker"]
+                or
+                status["cman"]
+                or
+                # not supported by older pcsd, do not fail if not present
+                status.get("pacemaker_remote", False)
+            ):
                 err_msgs.append(
                     "Cluster is currently running on node %s. You need to stop "
                         "the cluster in order to restore the configuration."
@@ -286,7 +303,7 @@ def config_restore_remote(infile_name, infile_obj):
     # If node returns HTTP 404 it does not support config syncing at all.
     for node in node_list:
         retval, output = utils.pauseConfigSyncing(node, 10 * 60)
-        if not (retval == 0 or output.endswith("(HTTP error: 404)")):
+        if not (retval == 0 or "(HTTP error: 404)" in output):
             utils.err(output)
 
     if infile_obj:
@@ -306,11 +323,13 @@ def config_restore_remote(infile_name, infile_obj):
 
 def config_restore_local(infile_name, infile_obj):
     if (
-        status.is_cman_running()
+        status.is_service_running("cman")
+        or
+        status.is_service_running("corosync")
         or
-        status.is_corosyc_running()
+        status.is_service_running("pacemaker")
         or
-        status.is_pacemaker_running()
+        status.is_service_running("pacemaker_remote")
     ):
         utils.err(
             "Cluster is currently running on this node. You need to stop "
diff --git a/pcs/constraint.py b/pcs/constraint.py
index 5d9b0df..e32f1a3 100644
--- a/pcs/constraint.py
+++ b/pcs/constraint.py
@@ -593,7 +593,7 @@ def location_show(argv):
             print("  Node: " + node)
 
             nodehash_label = (
-                (nodehashon, "    Allowed to run:")
+                (nodehashon, "    Allowed to run:"),
                 (nodehashoff, "    Not allowed to run:")
             )
             for nodehash, label in nodehash_label:
diff --git a/pcs/prop.py b/pcs/prop.py
index 3a65990..36eba60 100644
--- a/pcs/prop.py
+++ b/pcs/prop.py
@@ -7,7 +7,6 @@ from __future__ import (
 
 import sys
 import json
-from xml.dom.minidom import parseString
 
 from pcs import usage
 from pcs import utils
@@ -116,7 +115,7 @@ def list_property(argv):
         properties = {}
 
     if "--defaults" not in utils.pcs_options:
-        properties = get_set_properties(
+        properties = utils.get_set_properties(
             None if print_all else argv[0],
             properties
         )
@@ -141,16 +140,3 @@ def get_default_properties():
         parameters[name] = prop["default"]
     return parameters
 
-def get_set_properties(prop_name=None, defaults=None):
-    properties = {} if defaults is None else dict(defaults)
-    (output, retVal) = utils.run(["cibadmin","-Q","--scope", "crm_config"])
-    if retVal != 0:
-        utils.err("unable to get crm_config\n"+output)
-    dom = parseString(output)
-    de = dom.documentElement
-    crm_config_properties = de.getElementsByTagName("nvpair")
-    for prop in crm_config_properties:
-        if prop_name is None or (prop_name == prop.getAttribute("name")):
-            properties[prop.getAttribute("name")] = prop.getAttribute("value")
-    return properties
-
diff --git a/pcs/quorum.py b/pcs/quorum.py
index a849282..1c2d41d 100644
--- a/pcs/quorum.py
+++ b/pcs/quorum.py
@@ -8,7 +8,6 @@ from __future__ import (
 import sys
 
 from pcs import (
-    prop,
     stonith,
     usage,
     utils,
@@ -234,7 +233,7 @@ def quorum_unblock_cmd(argv):
         utils.err("unable to cancel waiting for nodes")
     print("Quorum unblocked")
 
-    startup_fencing = prop.get_set_properties().get("startup-fencing", "")
+    startup_fencing = utils.get_set_properties().get("startup-fencing", "")
     utils.set_cib_property(
         "startup-fencing",
         "false" if startup_fencing.lower() != "false" else "true"
diff --git a/pcs/status.py b/pcs/status.py
index bdfcc85..86216ea 100644
--- a/pcs/status.py
+++ b/pcs/status.py
@@ -103,7 +103,7 @@ def full_status():
     print(output)
 
     if not utils.usefile:
-        if  "--full" in utils.pcs_options:
+        if  "--full" in utils.pcs_options and utils.hasCorosyncConf():
             print_pcsd_daemon_status()
             print()
         utils.serviceStatus("  ")
@@ -121,7 +121,10 @@ def nodes_status(argv):
         return
 
     if len(argv) == 1 and (argv[0] == "config"):
-        corosync_nodes = utils.getNodesFromCorosyncConf()
+        if utils.hasCorosyncConf():
+            corosync_nodes = utils.getNodesFromCorosyncConf()
+        else:
+            corosync_nodes = []
         try:
             pacemaker_nodes = sorted([
                 node.attrs.name for node
@@ -244,7 +247,7 @@ def cluster_status(argv):
         else:
             print("",line)
 
-    if not utils.usefile:
+    if not utils.usefile and utils.hasCorosyncConf():
         print()
         print_pcsd_daemon_status()
 
@@ -262,25 +265,11 @@ def xml_status():
         utils.err("running crm_mon, is pacemaker running?")
     print(output, end="")
 
-def is_cman_running():
-    if utils.is_systemctl():
-        dummy_output, retval = utils.run(["systemctl", "status", "cman.service"])
-    else:
-        dummy_output, retval = utils.run(["service", "cman", "status"])
-    return retval == 0
-
-def is_corosyc_running():
-    if utils.is_systemctl():
-        dummy_output, retval = utils.run(["systemctl", "status", "corosync.service"])
-    else:
-        dummy_output, retval = utils.run(["service", "corosync", "status"])
-    return retval == 0
-
-def is_pacemaker_running():
+def is_service_running(service):
     if utils.is_systemctl():
-        dummy_output, retval = utils.run(["systemctl", "status", "pacemaker.service"])
+        dummy_output, retval = utils.run(["systemctl", "status", service])
     else:
-        dummy_output, retval = utils.run(["service", "pacemaker", "status"])
+        dummy_output, retval = utils.run(["service", service, "status"])
     return retval == 0
 
 def print_pcsd_daemon_status():
diff --git a/pcs/stonith.py b/pcs/stonith.py
index ab9e926..c02f35a 100644
--- a/pcs/stonith.py
+++ b/pcs/stonith.py
@@ -225,7 +225,11 @@ def stonith_level_add(level, node, devices):
         for dev in devices.split(","):
             if not utils.is_stonith_resource(dev):
                 utils.err("%s is not a stonith id (use --force to override)" % dev)
-        if not utils.is_pacemaker_node(node) and not utils.is_corosync_node(node):
+        corosync_nodes = []
+        if utils.hasCorosyncConf():
+            corosync_nodes = utils.getNodesFromCorosyncConf()
+        pacemaker_nodes = utils.getNodesFromPacemaker()
+        if node not in corosync_nodes and node not in pacemaker_nodes:
             utils.err("%s is not currently a node (use --force to override)" % node)
 
     ft = dom.getElementsByTagName("fencing-topology")
@@ -321,6 +325,10 @@ def stonith_level_clear(node = None):
 
 def stonith_level_verify():
     dom = utils.get_cib_dom()
+    corosync_nodes = []
+    if utils.hasCorosyncConf():
+        corosync_nodes = utils.getNodesFromCorosyncConf()
+    pacemaker_nodes = utils.getNodesFromPacemaker()
 
     fls = dom.getElementsByTagName("fencing-level")
     for fl in fls:
@@ -329,7 +337,7 @@ def stonith_level_verify():
         for dev in devices.split(","):
             if not utils.is_stonith_resource(dev):
                 utils.err("%s is not a stonith id" % dev)
-        if not utils.is_corosync_node(node) and not utils.is_pacemaker_node(node):
+        if node not in corosync_nodes and node not in pacemaker_nodes:
             utils.err("%s is not currently a node" % node)
 
 def stonith_level_show():
diff --git a/pcs/utils.py b/pcs/utils.py
index 2cfb693..3970eff 100644
--- a/pcs/utils.py
+++ b/pcs/utils.py
@@ -301,6 +301,8 @@ def canAddNodeToCluster(node):
                 return (False, "unable to authenticate to node")
             if "node_available" in myout and myout["node_available"] == True:
                 return (True, "")
+            elif myout.get("pacemaker_remote", False):
+                return (False, "node is running pacemaker_remote")
             else:
                 return (False, "node is already in a cluster")
         except ValueError:
@@ -465,6 +467,14 @@ def getNodesFromPacemaker():
     except LibraryError as e:
         process_library_reports(e.args)
 
+def hasCorosyncConf(conf=None):
+    if not conf:
+        if is_rhel6():
+            conf = settings.cluster_conf_file
+        else:
+            conf = settings.corosync_conf_file
+    return os.path.isfile(conf)
+
 def getCorosyncConf(conf=None):
     if not conf:
         if is_rhel6():
@@ -1071,18 +1081,6 @@ def does_exist(xpath_query):
         return False
     return True
 
-def is_pacemaker_node(node):
-    p_nodes = getNodesFromPacemaker()
-    if node in p_nodes:
-        return True
-    return False
-
-def is_corosync_node(node):
-    c_nodes = getNodesFromCorosyncConf()
-    if node in c_nodes:
-        return True
-    return False
-
 def get_group_children(group_id):
     child_resources = []
     dom = get_cib_dom()
@@ -1838,7 +1836,7 @@ def getCorosyncNodesID(allow_failure=False):
         err_msgs, retval, output, dummy_std_err = call_local_pcsd(
             ['status', 'nodes', 'corosync-id'], True
         )
-        if err_msgs:
+        if err_msgs and not allow_failure:
             for msg in err_msgs:
                 err(msg, False)
             sys.exit(1)
@@ -1866,6 +1864,7 @@ def getCorosyncNodesID(allow_failure=False):
 
 # Warning, if a node has never started the hostname may be '(null)'
 #TODO This doesn't work on CMAN clusters at all and should be removed completely
+# Doesn't work on pacemaker-remote nodes either
 def getPacemakerNodesID(allow_failure=False):
     if os.getuid() == 0:
         (output, retval) = run(['crm_node', '-l'])
@@ -1873,7 +1872,7 @@ def getPacemakerNodesID(allow_failure=False):
         err_msgs, retval, output, dummy_std_err = call_local_pcsd(
             ['status', 'nodes', 'pacemaker-id'], True
         )
-        if err_msgs:
+        if err_msgs and not allow_failure:
             for msg in err_msgs:
                 err(msg, False)
             sys.exit(1)
@@ -1893,9 +1892,11 @@ def getPacemakerNodesID(allow_failure=False):
     return pm_nodes
 
 def corosyncPacemakerNodeCheck():
-    # does not work on CMAN clusters
-    pm_nodes = getPacemakerNodesID()
-    cs_nodes = getCorosyncNodesID()
+    # does not work on CMAN clusters and pacemaker-remote nodes
+    # we do not want a failure to exit pcs as this is only a minor information
+    # function
+    pm_nodes = getPacemakerNodesID(allow_failure=True)
+    cs_nodes = getCorosyncNodesID(allow_failure=True)
 
     for node_id in pm_nodes:
         if pm_nodes[node_id] == "(null)":
@@ -1920,10 +1921,9 @@ def getClusterName():
     if is_rhel6():
         try:
             dom = parse(settings.cluster_conf_file)
+            return dom.documentElement.getAttribute("name")
         except (IOError,xml.parsers.expat.ExpatError):
-            return ""
-
-        return dom.documentElement.getAttribute("name")
+            pass
     else:
         try:
             f = open(settings.corosync_conf_file,'r')
@@ -1937,7 +1937,15 @@ def getClusterName():
             if cluster_name:
                 return cluster_name
         except (IOError, corosync_conf_parser.CorosyncConfParserException):
-            return ""
+            pass
+
+    # there is no corosync.conf or cluster.conf on remote nodes, we can try to
+    # get cluster name from pacemaker
+    try:
+        return get_set_properties("cluster-name")["cluster-name"]
+    except:
+        # we need to catch SystemExit (from utils.err), parse errors and so on
+        pass
 
     return ""
 
@@ -2024,23 +2032,30 @@ def serviceStatus(prefix):
     if not is_systemctl():
         return
     print("Daemon Status:")
-    for service in ["corosync", "pacemaker", "pcsd"]:
-        print('{0}{1}: {2}/{3}'.format(
-            prefix, service,
-            run(["systemctl", 'is-active', service])[0].strip(),
-            run(["systemctl", 'is-enabled', service])[0].strip()
-        ))
-    try:
-        sbd_running = is_service_running(cmd_runner(), "sbd")
-        sbd_enabled = is_service_enabled(cmd_runner(), "sbd")
-        if sbd_enabled or sbd_running:
-            print("{prefix}sbd: {active}/{enabled}".format(
-                prefix=prefix,
-                active=("active" if sbd_running else "inactive"),
-                enabled=("enabled" if sbd_enabled else "disabled")
-            ))
-    except LibraryError:
-        pass
+    service_def = [
+        # (
+        #     service name,
+        #     display even if not enabled nor running
+        # )
+        ("corosync", True),
+        ("pacemaker", True),
+        ("pacemaker_remote", False),
+        ("pcsd", True),
+        ("sbd", False),
+    ]
+    for service, display_always in service_def:
+        try:
+            running = is_service_running(cmd_runner(), service)
+            enabled = is_service_enabled(cmd_runner(), service)
+            if display_always or enabled or running:
+                print("{prefix}{service}: {active}/{enabled}".format(
+                    prefix=prefix,
+                    service=service,
+                    active=("active" if running else "inactive"),
+                    enabled=("enabled" if enabled else "disabled")
+                ))
+        except LibraryError:
+            pass
 
 def enableServices():
     # do NOT handle SBD in here, it is started by pacemaker not systemd or init
@@ -2677,3 +2692,16 @@ def exit_on_cmdline_input_errror(error, main_name, usage_name):
 
 def get_report_processor():
     return LibraryReportProcessorToConsole(debug=("--debug" in pcs_options))
+
+def get_set_properties(prop_name=None, defaults=None):
+    properties = {} if defaults is None else dict(defaults)
+    (output, retVal) = run(["cibadmin","-Q","--scope", "crm_config"])
+    if retVal != 0:
+        err("unable to get crm_config\n"+output)
+    dom = parseString(output)
+    de = dom.documentElement
+    crm_config_properties = de.getElementsByTagName("nvpair")
+    for prop in crm_config_properties:
+        if prop_name is None or (prop_name == prop.getAttribute("name")):
+            properties[prop.getAttribute("name")] = prop.getAttribute("value")
+    return properties
diff --git a/pcsd/cluster_entity.rb b/pcsd/cluster_entity.rb
index f54cd30..fa56fe2 100644
--- a/pcsd/cluster_entity.rb
+++ b/pcsd/cluster_entity.rb
@@ -1011,7 +1011,9 @@ module ClusterEntity
       @uptime = 'unknown'
       @name = nil
       @services = {}
-      [:pacemaker, :corosync, :pcsd, :cman, :sbd].each do |service|
+      [
+        :pacemaker, :pacemaker_remote, :corosync, :pcsd, :cman, :sbd
+      ].each do |service|
         @services[service] = {
           :installed => nil,
           :running => nil,
diff --git a/pcsd/pcs.rb b/pcsd/pcs.rb
index 57082be..0956de9 100644
--- a/pcsd/pcs.rb
+++ b/pcsd/pcs.rb
@@ -892,6 +892,10 @@ def pacemaker_running?()
   is_service_running?('pacemaker')
 end
 
+def pacemaker_remote_running?()
+  is_service_running?('pacemaker_remote')
+end
+
 def get_pacemaker_version()
   begin
     stdout, stderror, retval = run_cmd(
diff --git a/pcsd/remote.rb b/pcsd/remote.rb
index 75c9465..6a3a692 100644
--- a/pcsd/remote.rb
+++ b/pcsd/remote.rb
@@ -769,9 +769,19 @@ def get_sw_versions(params, request, auth_user)
 end
 
 def remote_node_available(params, request, auth_user)
-  if (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or File.exist?("/var/lib/pacemaker/cib/cib.xml")
+  if (
+    (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or
+    (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or
+    File.exist?("/var/lib/pacemaker/cib/cib.xml")
+  )
     return JSON.generate({:node_available => false})
   end
+  if pacemaker_remote_running?()
+    return JSON.generate({
+      :node_available => false,
+      :pacemaker_remote => true,
+    })
+  end
   return JSON.generate({:node_available => true})
 end
 
@@ -1038,6 +1048,8 @@ def node_status(params, request, auth_user)
     :cman => node.cman,
     :corosync_enabled => node.corosync_enabled,
     :pacemaker_enabled => node.pacemaker_enabled,
+    :pacemaker_remote => node.services[:pacemaker_remote][:running],
+    :pacemaker_remote_enabled => node.services[:pacemaker_remote][:enabled],
     :pcsd_enabled => node.pcsd_enabled,
     :corosync_online => status[:corosync_online],
     :corosync_offline => status[:corosync_offline],
-- 
1.8.3.1