From 5d8bab038a7aa64c38b79e5de9579af4c73e70a2 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Thu, 14 Jul 2016 17:04:04 +0200
Subject: [PATCH] fixes for pcs cli running on a remote node
---
pcs/acl.py | 2 +-
pcs/cluster.py | 13 ++++++-
pcs/config.py | 37 +++++++++++++-----
pcs/constraint.py | 2 +-
pcs/prop.py | 16 +-------
pcs/quorum.py | 3 +-
pcs/status.py | 29 +++++---------
pcs/stonith.py | 12 +++++-
pcs/utils.py | 104 +++++++++++++++++++++++++++++++------------------
pcsd/cluster_entity.rb | 4 +-
pcsd/pcs.rb | 4 ++
pcsd/remote.rb | 14 ++++++-
12 files changed, 149 insertions(+), 91 deletions(-)
diff --git a/pcs/acl.py b/pcs/acl.py
index 118ceed..0378c10 100644
--- a/pcs/acl.py
+++ b/pcs/acl.py
@@ -55,7 +55,7 @@ def acl_cmd(argv):
def acl_show(argv):
dom = utils.get_cib_dom()
- properties = prop.get_set_properties(defaults=prop.get_default_properties())
+ properties = utils.get_set_properties(defaults=prop.get_default_properties())
acl_enabled = properties.get("enable-acl", "").lower()
if is_true(acl_enabled):
print("ACLs are enabled")
diff --git a/pcs/cluster.py b/pcs/cluster.py
index 4155103..13446d4 100644
--- a/pcs/cluster.py
+++ b/pcs/cluster.py
@@ -1157,7 +1157,18 @@ def stop_cluster_corosync():
utils.err("unable to stop {0}".format(service))
def kill_cluster(argv):
- daemons = ["crmd", "pengine", "attrd", "lrmd", "stonithd", "cib", "pacemakerd", "corosync-qdevice", "corosync"]
+ daemons = [
+ "crmd",
+ "pengine",
+ "attrd",
+ "lrmd",
+ "stonithd",
+ "cib",
+ "pacemakerd",
+ "pacemaker_remoted",
+ "corosync-qdevice",
+ "corosync",
+ ]
dummy_output, dummy_retval = utils.run(["killall", "-9"] + daemons)
# if dummy_retval != 0:
# print "Error: unable to execute killall -9"
diff --git a/pcs/config.py b/pcs/config.py
index 3d86b39..9119c3c 100644
--- a/pcs/config.py
+++ b/pcs/config.py
@@ -95,14 +95,22 @@ def config_show(argv):
print()
config_show_cib()
if (
- utils.is_rhel6()
- or
- (not utils.usefile and "--corosync_conf" not in utils.pcs_options)
+ utils.hasCorosyncConf()
+ and
+ (
+ utils.is_rhel6()
+ or
+ (not utils.usefile and "--corosync_conf" not in utils.pcs_options)
+ )
):
# with corosync 1 and cman, uid gid is part of cluster.conf file
# with corosync 2, uid gid is in a separate directory
cluster.cluster_uidgid([], True)
- if "--corosync_conf" in utils.pcs_options or not utils.is_rhel6():
+ if (
+ "--corosync_conf" in utils.pcs_options
+ or
+ (not utils.is_rhel6() and utils.hasCorosyncConf())
+ ):
print()
print("Quorum:")
try:
@@ -267,7 +275,16 @@ def config_restore_remote(infile_name, infile_obj):
err_msgs.append(output)
continue
status = json.loads(output)
- if status["corosync"] or status["pacemaker"] or status["cman"]:
+ if (
+ status["corosync"]
+ or
+ status["pacemaker"]
+ or
+ status["cman"]
+ or
+ # not supported by older pcsd, do not fail if not present
+ status.get("pacemaker_remote", False)
+ ):
err_msgs.append(
"Cluster is currently running on node %s. You need to stop "
"the cluster in order to restore the configuration."
@@ -286,7 +303,7 @@ def config_restore_remote(infile_name, infile_obj):
# If node returns HTTP 404 it does not support config syncing at all.
for node in node_list:
retval, output = utils.pauseConfigSyncing(node, 10 * 60)
- if not (retval == 0 or output.endswith("(HTTP error: 404)")):
+ if not (retval == 0 or "(HTTP error: 404)" in output):
utils.err(output)
if infile_obj:
@@ -306,11 +323,13 @@ def config_restore_remote(infile_name, infile_obj):
def config_restore_local(infile_name, infile_obj):
if (
- status.is_cman_running()
+ status.is_service_running("cman")
+ or
+ status.is_service_running("corosync")
or
- status.is_corosyc_running()
+ status.is_service_running("pacemaker")
or
- status.is_pacemaker_running()
+ status.is_service_running("pacemaker_remote")
):
utils.err(
"Cluster is currently running on this node. You need to stop "
diff --git a/pcs/constraint.py b/pcs/constraint.py
index 5d9b0df..e32f1a3 100644
--- a/pcs/constraint.py
+++ b/pcs/constraint.py
@@ -593,7 +593,7 @@ def location_show(argv):
print(" Node: " + node)
nodehash_label = (
- (nodehashon, " Allowed to run:")
+ (nodehashon, " Allowed to run:"),
(nodehashoff, " Not allowed to run:")
)
for nodehash, label in nodehash_label:
diff --git a/pcs/prop.py b/pcs/prop.py
index 3a65990..36eba60 100644
--- a/pcs/prop.py
+++ b/pcs/prop.py
@@ -7,7 +7,6 @@ from __future__ import (
import sys
import json
-from xml.dom.minidom import parseString
from pcs import usage
from pcs import utils
@@ -116,7 +115,7 @@ def list_property(argv):
properties = {}
if "--defaults" not in utils.pcs_options:
- properties = get_set_properties(
+ properties = utils.get_set_properties(
None if print_all else argv[0],
properties
)
@@ -141,16 +140,3 @@ def get_default_properties():
parameters[name] = prop["default"]
return parameters
-def get_set_properties(prop_name=None, defaults=None):
- properties = {} if defaults is None else dict(defaults)
- (output, retVal) = utils.run(["cibadmin","-Q","--scope", "crm_config"])
- if retVal != 0:
- utils.err("unable to get crm_config\n"+output)
- dom = parseString(output)
- de = dom.documentElement
- crm_config_properties = de.getElementsByTagName("nvpair")
- for prop in crm_config_properties:
- if prop_name is None or (prop_name == prop.getAttribute("name")):
- properties[prop.getAttribute("name")] = prop.getAttribute("value")
- return properties
-
diff --git a/pcs/quorum.py b/pcs/quorum.py
index a849282..1c2d41d 100644
--- a/pcs/quorum.py
+++ b/pcs/quorum.py
@@ -8,7 +8,6 @@ from __future__ import (
import sys
from pcs import (
- prop,
stonith,
usage,
utils,
@@ -234,7 +233,7 @@ def quorum_unblock_cmd(argv):
utils.err("unable to cancel waiting for nodes")
print("Quorum unblocked")
- startup_fencing = prop.get_set_properties().get("startup-fencing", "")
+ startup_fencing = utils.get_set_properties().get("startup-fencing", "")
utils.set_cib_property(
"startup-fencing",
"false" if startup_fencing.lower() != "false" else "true"
diff --git a/pcs/status.py b/pcs/status.py
index bdfcc85..86216ea 100644
--- a/pcs/status.py
+++ b/pcs/status.py
@@ -103,7 +103,7 @@ def full_status():
print(output)
if not utils.usefile:
- if "--full" in utils.pcs_options:
+ if "--full" in utils.pcs_options and utils.hasCorosyncConf():
print_pcsd_daemon_status()
print()
utils.serviceStatus(" ")
@@ -121,7 +121,10 @@ def nodes_status(argv):
return
if len(argv) == 1 and (argv[0] == "config"):
- corosync_nodes = utils.getNodesFromCorosyncConf()
+ if utils.hasCorosyncConf():
+ corosync_nodes = utils.getNodesFromCorosyncConf()
+ else:
+ corosync_nodes = []
try:
pacemaker_nodes = sorted([
node.attrs.name for node
@@ -244,7 +247,7 @@ def cluster_status(argv):
else:
print("",line)
- if not utils.usefile:
+ if not utils.usefile and utils.hasCorosyncConf():
print()
print_pcsd_daemon_status()
@@ -262,25 +265,11 @@ def xml_status():
utils.err("running crm_mon, is pacemaker running?")
print(output, end="")
-def is_cman_running():
- if utils.is_systemctl():
- dummy_output, retval = utils.run(["systemctl", "status", "cman.service"])
- else:
- dummy_output, retval = utils.run(["service", "cman", "status"])
- return retval == 0
-
-def is_corosyc_running():
- if utils.is_systemctl():
- dummy_output, retval = utils.run(["systemctl", "status", "corosync.service"])
- else:
- dummy_output, retval = utils.run(["service", "corosync", "status"])
- return retval == 0
-
-def is_pacemaker_running():
+def is_service_running(service):
if utils.is_systemctl():
- dummy_output, retval = utils.run(["systemctl", "status", "pacemaker.service"])
+ dummy_output, retval = utils.run(["systemctl", "status", service])
else:
- dummy_output, retval = utils.run(["service", "pacemaker", "status"])
+ dummy_output, retval = utils.run(["service", service, "status"])
return retval == 0
def print_pcsd_daemon_status():
diff --git a/pcs/stonith.py b/pcs/stonith.py
index ab9e926..c02f35a 100644
--- a/pcs/stonith.py
+++ b/pcs/stonith.py
@@ -225,7 +225,11 @@ def stonith_level_add(level, node, devices):
for dev in devices.split(","):
if not utils.is_stonith_resource(dev):
utils.err("%s is not a stonith id (use --force to override)" % dev)
- if not utils.is_pacemaker_node(node) and not utils.is_corosync_node(node):
+ corosync_nodes = []
+ if utils.hasCorosyncConf():
+ corosync_nodes = utils.getNodesFromCorosyncConf()
+ pacemaker_nodes = utils.getNodesFromPacemaker()
+ if node not in corosync_nodes and node not in pacemaker_nodes:
utils.err("%s is not currently a node (use --force to override)" % node)
ft = dom.getElementsByTagName("fencing-topology")
@@ -321,6 +325,10 @@ def stonith_level_clear(node = None):
def stonith_level_verify():
dom = utils.get_cib_dom()
+ corosync_nodes = []
+ if utils.hasCorosyncConf():
+ corosync_nodes = utils.getNodesFromCorosyncConf()
+ pacemaker_nodes = utils.getNodesFromPacemaker()
fls = dom.getElementsByTagName("fencing-level")
for fl in fls:
@@ -329,7 +337,7 @@ def stonith_level_verify():
for dev in devices.split(","):
if not utils.is_stonith_resource(dev):
utils.err("%s is not a stonith id" % dev)
- if not utils.is_corosync_node(node) and not utils.is_pacemaker_node(node):
+ if node not in corosync_nodes and node not in pacemaker_nodes:
utils.err("%s is not currently a node" % node)
def stonith_level_show():
diff --git a/pcs/utils.py b/pcs/utils.py
index 2cfb693..3970eff 100644
--- a/pcs/utils.py
+++ b/pcs/utils.py
@@ -301,6 +301,8 @@ def canAddNodeToCluster(node):
return (False, "unable to authenticate to node")
if "node_available" in myout and myout["node_available"] == True:
return (True, "")
+ elif myout.get("pacemaker_remote", False):
+ return (False, "node is running pacemaker_remote")
else:
return (False, "node is already in a cluster")
except ValueError:
@@ -465,6 +467,14 @@ def getNodesFromPacemaker():
except LibraryError as e:
process_library_reports(e.args)
+def hasCorosyncConf(conf=None):
+ if not conf:
+ if is_rhel6():
+ conf = settings.cluster_conf_file
+ else:
+ conf = settings.corosync_conf_file
+ return os.path.isfile(conf)
+
def getCorosyncConf(conf=None):
if not conf:
if is_rhel6():
@@ -1071,18 +1081,6 @@ def does_exist(xpath_query):
return False
return True
-def is_pacemaker_node(node):
- p_nodes = getNodesFromPacemaker()
- if node in p_nodes:
- return True
- return False
-
-def is_corosync_node(node):
- c_nodes = getNodesFromCorosyncConf()
- if node in c_nodes:
- return True
- return False
-
def get_group_children(group_id):
child_resources = []
dom = get_cib_dom()
@@ -1838,7 +1836,7 @@ def getCorosyncNodesID(allow_failure=False):
err_msgs, retval, output, dummy_std_err = call_local_pcsd(
['status', 'nodes', 'corosync-id'], True
)
- if err_msgs:
+ if err_msgs and not allow_failure:
for msg in err_msgs:
err(msg, False)
sys.exit(1)
@@ -1866,6 +1864,7 @@ def getCorosyncNodesID(allow_failure=False):
# Warning, if a node has never started the hostname may be '(null)'
#TODO This doesn't work on CMAN clusters at all and should be removed completely
+# Doesn't work on pacemaker-remote nodes either
def getPacemakerNodesID(allow_failure=False):
if os.getuid() == 0:
(output, retval) = run(['crm_node', '-l'])
@@ -1873,7 +1872,7 @@ def getPacemakerNodesID(allow_failure=False):
err_msgs, retval, output, dummy_std_err = call_local_pcsd(
['status', 'nodes', 'pacemaker-id'], True
)
- if err_msgs:
+ if err_msgs and not allow_failure:
for msg in err_msgs:
err(msg, False)
sys.exit(1)
@@ -1893,9 +1892,11 @@ def getPacemakerNodesID(allow_failure=False):
return pm_nodes
def corosyncPacemakerNodeCheck():
- # does not work on CMAN clusters
- pm_nodes = getPacemakerNodesID()
- cs_nodes = getCorosyncNodesID()
+ # does not work on CMAN clusters and pacemaker-remote nodes
+ # we do not want a failure to exit pcs as this is only a minor information
+ # function
+ pm_nodes = getPacemakerNodesID(allow_failure=True)
+ cs_nodes = getCorosyncNodesID(allow_failure=True)
for node_id in pm_nodes:
if pm_nodes[node_id] == "(null)":
@@ -1920,10 +1921,9 @@ def getClusterName():
if is_rhel6():
try:
dom = parse(settings.cluster_conf_file)
+ return dom.documentElement.getAttribute("name")
except (IOError,xml.parsers.expat.ExpatError):
- return ""
-
- return dom.documentElement.getAttribute("name")
+ pass
else:
try:
f = open(settings.corosync_conf_file,'r')
@@ -1937,7 +1937,15 @@ def getClusterName():
if cluster_name:
return cluster_name
except (IOError, corosync_conf_parser.CorosyncConfParserException):
- return ""
+ pass
+
+ # there is no corosync.conf or cluster.conf on remote nodes, we can try to
+ # get cluster name from pacemaker
+ try:
+ return get_set_properties("cluster-name")["cluster-name"]
+ except:
+ # we need to catch SystemExit (from utils.err), parse errors and so on
+ pass
return ""
@@ -2024,23 +2032,30 @@ def serviceStatus(prefix):
if not is_systemctl():
return
print("Daemon Status:")
- for service in ["corosync", "pacemaker", "pcsd"]:
- print('{0}{1}: {2}/{3}'.format(
- prefix, service,
- run(["systemctl", 'is-active', service])[0].strip(),
- run(["systemctl", 'is-enabled', service])[0].strip()
- ))
- try:
- sbd_running = is_service_running(cmd_runner(), "sbd")
- sbd_enabled = is_service_enabled(cmd_runner(), "sbd")
- if sbd_enabled or sbd_running:
- print("{prefix}sbd: {active}/{enabled}".format(
- prefix=prefix,
- active=("active" if sbd_running else "inactive"),
- enabled=("enabled" if sbd_enabled else "disabled")
- ))
- except LibraryError:
- pass
+ service_def = [
+ # (
+ # service name,
+ # display even if not enabled nor running
+ # )
+ ("corosync", True),
+ ("pacemaker", True),
+ ("pacemaker_remote", False),
+ ("pcsd", True),
+ ("sbd", False),
+ ]
+ for service, display_always in service_def:
+ try:
+ running = is_service_running(cmd_runner(), service)
+ enabled = is_service_enabled(cmd_runner(), service)
+ if display_always or enabled or running:
+ print("{prefix}{service}: {active}/{enabled}".format(
+ prefix=prefix,
+ service=service,
+ active=("active" if running else "inactive"),
+ enabled=("enabled" if enabled else "disabled")
+ ))
+ except LibraryError:
+ pass
def enableServices():
# do NOT handle SBD in here, it is started by pacemaker not systemd or init
@@ -2677,3 +2692,16 @@ def exit_on_cmdline_input_errror(error, main_name, usage_name):
def get_report_processor():
return LibraryReportProcessorToConsole(debug=("--debug" in pcs_options))
+
+def get_set_properties(prop_name=None, defaults=None):
+ properties = {} if defaults is None else dict(defaults)
+ (output, retVal) = run(["cibadmin","-Q","--scope", "crm_config"])
+ if retVal != 0:
+ err("unable to get crm_config\n"+output)
+ dom = parseString(output)
+ de = dom.documentElement
+ crm_config_properties = de.getElementsByTagName("nvpair")
+ for prop in crm_config_properties:
+ if prop_name is None or (prop_name == prop.getAttribute("name")):
+ properties[prop.getAttribute("name")] = prop.getAttribute("value")
+ return properties
diff --git a/pcsd/cluster_entity.rb b/pcsd/cluster_entity.rb
index f54cd30..fa56fe2 100644
--- a/pcsd/cluster_entity.rb
+++ b/pcsd/cluster_entity.rb
@@ -1011,7 +1011,9 @@ module ClusterEntity
@uptime = 'unknown'
@name = nil
@services = {}
- [:pacemaker, :corosync, :pcsd, :cman, :sbd].each do |service|
+ [
+ :pacemaker, :pacemaker_remote, :corosync, :pcsd, :cman, :sbd
+ ].each do |service|
@services[service] = {
:installed => nil,
:running => nil,
diff --git a/pcsd/pcs.rb b/pcsd/pcs.rb
index 57082be..0956de9 100644
--- a/pcsd/pcs.rb
+++ b/pcsd/pcs.rb
@@ -892,6 +892,10 @@ def pacemaker_running?()
is_service_running?('pacemaker')
end
+def pacemaker_remote_running?()
+ is_service_running?('pacemaker_remote')
+end
+
def get_pacemaker_version()
begin
stdout, stderror, retval = run_cmd(
diff --git a/pcsd/remote.rb b/pcsd/remote.rb
index 75c9465..6a3a692 100644
--- a/pcsd/remote.rb
+++ b/pcsd/remote.rb
@@ -769,9 +769,19 @@ def get_sw_versions(params, request, auth_user)
end
def remote_node_available(params, request, auth_user)
- if (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or File.exist?("/var/lib/pacemaker/cib/cib.xml")
+ if (
+ (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or
+ (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or
+ File.exist?("/var/lib/pacemaker/cib/cib.xml")
+ )
return JSON.generate({:node_available => false})
end
+ if pacemaker_remote_running?()
+ return JSON.generate({
+ :node_available => false,
+ :pacemaker_remote => true,
+ })
+ end
return JSON.generate({:node_available => true})
end
@@ -1038,6 +1048,8 @@ def node_status(params, request, auth_user)
:cman => node.cman,
:corosync_enabled => node.corosync_enabled,
:pacemaker_enabled => node.pacemaker_enabled,
+ :pacemaker_remote => node.services[:pacemaker_remote][:running],
+ :pacemaker_remote_enabled => node.services[:pacemaker_remote][:enabled],
:pcsd_enabled => node.pcsd_enabled,
:corosync_online => status[:corosync_online],
:corosync_offline => status[:corosync_offline],
--
1.8.3.1