From 5d8bab038a7aa64c38b79e5de9579af4c73e70a2 Mon Sep 17 00:00:00 2001 From: Tomas Jelinek Date: Thu, 14 Jul 2016 17:04:04 +0200 Subject: [PATCH] fixes for pcs cli running on a remote node --- pcs/acl.py | 2 +- pcs/cluster.py | 13 ++++++- pcs/config.py | 37 +++++++++++++----- pcs/constraint.py | 2 +- pcs/prop.py | 16 +------- pcs/quorum.py | 3 +- pcs/status.py | 29 +++++--------- pcs/stonith.py | 12 +++++- pcs/utils.py | 104 +++++++++++++++++++++++++++++++------------------ pcsd/cluster_entity.rb | 4 +- pcsd/pcs.rb | 4 ++ pcsd/remote.rb | 14 ++++++- 12 files changed, 149 insertions(+), 91 deletions(-) diff --git a/pcs/acl.py b/pcs/acl.py index 118ceed..0378c10 100644 --- a/pcs/acl.py +++ b/pcs/acl.py @@ -55,7 +55,7 @@ def acl_cmd(argv): def acl_show(argv): dom = utils.get_cib_dom() - properties = prop.get_set_properties(defaults=prop.get_default_properties()) + properties = utils.get_set_properties(defaults=prop.get_default_properties()) acl_enabled = properties.get("enable-acl", "").lower() if is_true(acl_enabled): print("ACLs are enabled") diff --git a/pcs/cluster.py b/pcs/cluster.py index 4155103..13446d4 100644 --- a/pcs/cluster.py +++ b/pcs/cluster.py @@ -1157,7 +1157,18 @@ def stop_cluster_corosync(): utils.err("unable to stop {0}".format(service)) def kill_cluster(argv): - daemons = ["crmd", "pengine", "attrd", "lrmd", "stonithd", "cib", "pacemakerd", "corosync-qdevice", "corosync"] + daemons = [ + "crmd", + "pengine", + "attrd", + "lrmd", + "stonithd", + "cib", + "pacemakerd", + "pacemaker_remoted", + "corosync-qdevice", + "corosync", + ] dummy_output, dummy_retval = utils.run(["killall", "-9"] + daemons) # if dummy_retval != 0: # print "Error: unable to execute killall -9" diff --git a/pcs/config.py b/pcs/config.py index 3d86b39..9119c3c 100644 --- a/pcs/config.py +++ b/pcs/config.py @@ -95,14 +95,22 @@ def config_show(argv): print() config_show_cib() if ( - utils.is_rhel6() - or - (not utils.usefile and "--corosync_conf" not in utils.pcs_options) + utils.hasCorosyncConf() + and + ( + utils.is_rhel6() + or + (not utils.usefile and "--corosync_conf" not in utils.pcs_options) + ) ): # with corosync 1 and cman, uid gid is part of cluster.conf file # with corosync 2, uid gid is in a separate directory cluster.cluster_uidgid([], True) - if "--corosync_conf" in utils.pcs_options or not utils.is_rhel6(): + if ( + "--corosync_conf" in utils.pcs_options + or + (not utils.is_rhel6() and utils.hasCorosyncConf()) + ): print() print("Quorum:") try: @@ -267,7 +275,16 @@ def config_restore_remote(infile_name, infile_obj): err_msgs.append(output) continue status = json.loads(output) - if status["corosync"] or status["pacemaker"] or status["cman"]: + if ( + status["corosync"] + or + status["pacemaker"] + or + status["cman"] + or + # not supported by older pcsd, do not fail if not present + status.get("pacemaker_remote", False) + ): err_msgs.append( "Cluster is currently running on node %s. You need to stop " "the cluster in order to restore the configuration." @@ -286,7 +303,7 @@ def config_restore_remote(infile_name, infile_obj): # If node returns HTTP 404 it does not support config syncing at all. for node in node_list: retval, output = utils.pauseConfigSyncing(node, 10 * 60) - if not (retval == 0 or output.endswith("(HTTP error: 404)")): + if not (retval == 0 or "(HTTP error: 404)" in output): utils.err(output) if infile_obj: @@ -306,11 +323,13 @@ def config_restore_remote(infile_name, infile_obj): def config_restore_local(infile_name, infile_obj): if ( - status.is_cman_running() + status.is_service_running("cman") + or + status.is_service_running("corosync") or - status.is_corosyc_running() + status.is_service_running("pacemaker") or - status.is_pacemaker_running() + status.is_service_running("pacemaker_remote") ): utils.err( "Cluster is currently running on this node. You need to stop " diff --git a/pcs/constraint.py b/pcs/constraint.py index 5d9b0df..e32f1a3 100644 --- a/pcs/constraint.py +++ b/pcs/constraint.py @@ -593,7 +593,7 @@ def location_show(argv): print(" Node: " + node) nodehash_label = ( - (nodehashon, " Allowed to run:") + (nodehashon, " Allowed to run:"), (nodehashoff, " Not allowed to run:") ) for nodehash, label in nodehash_label: diff --git a/pcs/prop.py b/pcs/prop.py index 3a65990..36eba60 100644 --- a/pcs/prop.py +++ b/pcs/prop.py @@ -7,7 +7,6 @@ from __future__ import ( import sys import json -from xml.dom.minidom import parseString from pcs import usage from pcs import utils @@ -116,7 +115,7 @@ def list_property(argv): properties = {} if "--defaults" not in utils.pcs_options: - properties = get_set_properties( + properties = utils.get_set_properties( None if print_all else argv[0], properties ) @@ -141,16 +140,3 @@ def get_default_properties(): parameters[name] = prop["default"] return parameters -def get_set_properties(prop_name=None, defaults=None): - properties = {} if defaults is None else dict(defaults) - (output, retVal) = utils.run(["cibadmin","-Q","--scope", "crm_config"]) - if retVal != 0: - utils.err("unable to get crm_config\n"+output) - dom = parseString(output) - de = dom.documentElement - crm_config_properties = de.getElementsByTagName("nvpair") - for prop in crm_config_properties: - if prop_name is None or (prop_name == prop.getAttribute("name")): - properties[prop.getAttribute("name")] = prop.getAttribute("value") - return properties - diff --git a/pcs/quorum.py b/pcs/quorum.py index a849282..1c2d41d 100644 --- a/pcs/quorum.py +++ b/pcs/quorum.py @@ -8,7 +8,6 @@ from __future__ import ( import sys from pcs import ( - prop, stonith, usage, utils, @@ -234,7 +233,7 @@ def quorum_unblock_cmd(argv): utils.err("unable to cancel waiting for nodes") print("Quorum unblocked") - startup_fencing = prop.get_set_properties().get("startup-fencing", "") + startup_fencing = utils.get_set_properties().get("startup-fencing", "") utils.set_cib_property( "startup-fencing", "false" if startup_fencing.lower() != "false" else "true" diff --git a/pcs/status.py b/pcs/status.py index bdfcc85..86216ea 100644 --- a/pcs/status.py +++ b/pcs/status.py @@ -103,7 +103,7 @@ def full_status(): print(output) if not utils.usefile: - if "--full" in utils.pcs_options: + if "--full" in utils.pcs_options and utils.hasCorosyncConf(): print_pcsd_daemon_status() print() utils.serviceStatus(" ") @@ -121,7 +121,10 @@ def nodes_status(argv): return if len(argv) == 1 and (argv[0] == "config"): - corosync_nodes = utils.getNodesFromCorosyncConf() + if utils.hasCorosyncConf(): + corosync_nodes = utils.getNodesFromCorosyncConf() + else: + corosync_nodes = [] try: pacemaker_nodes = sorted([ node.attrs.name for node @@ -244,7 +247,7 @@ def cluster_status(argv): else: print("",line) - if not utils.usefile: + if not utils.usefile and utils.hasCorosyncConf(): print() print_pcsd_daemon_status() @@ -262,25 +265,11 @@ def xml_status(): utils.err("running crm_mon, is pacemaker running?") print(output, end="") -def is_cman_running(): - if utils.is_systemctl(): - dummy_output, retval = utils.run(["systemctl", "status", "cman.service"]) - else: - dummy_output, retval = utils.run(["service", "cman", "status"]) - return retval == 0 - -def is_corosyc_running(): - if utils.is_systemctl(): - dummy_output, retval = utils.run(["systemctl", "status", "corosync.service"]) - else: - dummy_output, retval = utils.run(["service", "corosync", "status"]) - return retval == 0 - -def is_pacemaker_running(): +def is_service_running(service): if utils.is_systemctl(): - dummy_output, retval = utils.run(["systemctl", "status", "pacemaker.service"]) + dummy_output, retval = utils.run(["systemctl", "status", service]) else: - dummy_output, retval = utils.run(["service", "pacemaker", "status"]) + dummy_output, retval = utils.run(["service", service, "status"]) return retval == 0 def print_pcsd_daemon_status(): diff --git a/pcs/stonith.py b/pcs/stonith.py index ab9e926..c02f35a 100644 --- a/pcs/stonith.py +++ b/pcs/stonith.py @@ -225,7 +225,11 @@ def stonith_level_add(level, node, devices): for dev in devices.split(","): if not utils.is_stonith_resource(dev): utils.err("%s is not a stonith id (use --force to override)" % dev) - if not utils.is_pacemaker_node(node) and not utils.is_corosync_node(node): + corosync_nodes = [] + if utils.hasCorosyncConf(): + corosync_nodes = utils.getNodesFromCorosyncConf() + pacemaker_nodes = utils.getNodesFromPacemaker() + if node not in corosync_nodes and node not in pacemaker_nodes: utils.err("%s is not currently a node (use --force to override)" % node) ft = dom.getElementsByTagName("fencing-topology") @@ -321,6 +325,10 @@ def stonith_level_clear(node = None): def stonith_level_verify(): dom = utils.get_cib_dom() + corosync_nodes = [] + if utils.hasCorosyncConf(): + corosync_nodes = utils.getNodesFromCorosyncConf() + pacemaker_nodes = utils.getNodesFromPacemaker() fls = dom.getElementsByTagName("fencing-level") for fl in fls: @@ -329,7 +337,7 @@ def stonith_level_verify(): for dev in devices.split(","): if not utils.is_stonith_resource(dev): utils.err("%s is not a stonith id" % dev) - if not utils.is_corosync_node(node) and not utils.is_pacemaker_node(node): + if node not in corosync_nodes and node not in pacemaker_nodes: utils.err("%s is not currently a node" % node) def stonith_level_show(): diff --git a/pcs/utils.py b/pcs/utils.py index 2cfb693..3970eff 100644 --- a/pcs/utils.py +++ b/pcs/utils.py @@ -301,6 +301,8 @@ def canAddNodeToCluster(node): return (False, "unable to authenticate to node") if "node_available" in myout and myout["node_available"] == True: return (True, "") + elif myout.get("pacemaker_remote", False): + return (False, "node is running pacemaker_remote") else: return (False, "node is already in a cluster") except ValueError: @@ -465,6 +467,14 @@ def getNodesFromPacemaker(): except LibraryError as e: process_library_reports(e.args) +def hasCorosyncConf(conf=None): + if not conf: + if is_rhel6(): + conf = settings.cluster_conf_file + else: + conf = settings.corosync_conf_file + return os.path.isfile(conf) + def getCorosyncConf(conf=None): if not conf: if is_rhel6(): @@ -1071,18 +1081,6 @@ def does_exist(xpath_query): return False return True -def is_pacemaker_node(node): - p_nodes = getNodesFromPacemaker() - if node in p_nodes: - return True - return False - -def is_corosync_node(node): - c_nodes = getNodesFromCorosyncConf() - if node in c_nodes: - return True - return False - def get_group_children(group_id): child_resources = [] dom = get_cib_dom() @@ -1838,7 +1836,7 @@ def getCorosyncNodesID(allow_failure=False): err_msgs, retval, output, dummy_std_err = call_local_pcsd( ['status', 'nodes', 'corosync-id'], True ) - if err_msgs: + if err_msgs and not allow_failure: for msg in err_msgs: err(msg, False) sys.exit(1) @@ -1866,6 +1864,7 @@ def getCorosyncNodesID(allow_failure=False): # Warning, if a node has never started the hostname may be '(null)' #TODO This doesn't work on CMAN clusters at all and should be removed completely +# Doesn't work on pacemaker-remote nodes either def getPacemakerNodesID(allow_failure=False): if os.getuid() == 0: (output, retval) = run(['crm_node', '-l']) @@ -1873,7 +1872,7 @@ def getPacemakerNodesID(allow_failure=False): err_msgs, retval, output, dummy_std_err = call_local_pcsd( ['status', 'nodes', 'pacemaker-id'], True ) - if err_msgs: + if err_msgs and not allow_failure: for msg in err_msgs: err(msg, False) sys.exit(1) @@ -1893,9 +1892,11 @@ def getPacemakerNodesID(allow_failure=False): return pm_nodes def corosyncPacemakerNodeCheck(): - # does not work on CMAN clusters - pm_nodes = getPacemakerNodesID() - cs_nodes = getCorosyncNodesID() + # does not work on CMAN clusters and pacemaker-remote nodes + # we do not want a failure to exit pcs as this is only a minor information + # function + pm_nodes = getPacemakerNodesID(allow_failure=True) + cs_nodes = getCorosyncNodesID(allow_failure=True) for node_id in pm_nodes: if pm_nodes[node_id] == "(null)": @@ -1920,10 +1921,9 @@ def getClusterName(): if is_rhel6(): try: dom = parse(settings.cluster_conf_file) + return dom.documentElement.getAttribute("name") except (IOError,xml.parsers.expat.ExpatError): - return "" - - return dom.documentElement.getAttribute("name") + pass else: try: f = open(settings.corosync_conf_file,'r') @@ -1937,7 +1937,15 @@ def getClusterName(): if cluster_name: return cluster_name except (IOError, corosync_conf_parser.CorosyncConfParserException): - return "" + pass + + # there is no corosync.conf or cluster.conf on remote nodes, we can try to + # get cluster name from pacemaker + try: + return get_set_properties("cluster-name")["cluster-name"] + except: + # we need to catch SystemExit (from utils.err), parse errors and so on + pass return "" @@ -2024,23 +2032,30 @@ def serviceStatus(prefix): if not is_systemctl(): return print("Daemon Status:") - for service in ["corosync", "pacemaker", "pcsd"]: - print('{0}{1}: {2}/{3}'.format( - prefix, service, - run(["systemctl", 'is-active', service])[0].strip(), - run(["systemctl", 'is-enabled', service])[0].strip() - )) - try: - sbd_running = is_service_running(cmd_runner(), "sbd") - sbd_enabled = is_service_enabled(cmd_runner(), "sbd") - if sbd_enabled or sbd_running: - print("{prefix}sbd: {active}/{enabled}".format( - prefix=prefix, - active=("active" if sbd_running else "inactive"), - enabled=("enabled" if sbd_enabled else "disabled") - )) - except LibraryError: - pass + service_def = [ + # ( + # service name, + # display even if not enabled nor running + # ) + ("corosync", True), + ("pacemaker", True), + ("pacemaker_remote", False), + ("pcsd", True), + ("sbd", False), + ] + for service, display_always in service_def: + try: + running = is_service_running(cmd_runner(), service) + enabled = is_service_enabled(cmd_runner(), service) + if display_always or enabled or running: + print("{prefix}{service}: {active}/{enabled}".format( + prefix=prefix, + service=service, + active=("active" if running else "inactive"), + enabled=("enabled" if enabled else "disabled") + )) + except LibraryError: + pass def enableServices(): # do NOT handle SBD in here, it is started by pacemaker not systemd or init @@ -2677,3 +2692,16 @@ def exit_on_cmdline_input_errror(error, main_name, usage_name): def get_report_processor(): return LibraryReportProcessorToConsole(debug=("--debug" in pcs_options)) + +def get_set_properties(prop_name=None, defaults=None): + properties = {} if defaults is None else dict(defaults) + (output, retVal) = run(["cibadmin","-Q","--scope", "crm_config"]) + if retVal != 0: + err("unable to get crm_config\n"+output) + dom = parseString(output) + de = dom.documentElement + crm_config_properties = de.getElementsByTagName("nvpair") + for prop in crm_config_properties: + if prop_name is None or (prop_name == prop.getAttribute("name")): + properties[prop.getAttribute("name")] = prop.getAttribute("value") + return properties diff --git a/pcsd/cluster_entity.rb b/pcsd/cluster_entity.rb index f54cd30..fa56fe2 100644 --- a/pcsd/cluster_entity.rb +++ b/pcsd/cluster_entity.rb @@ -1011,7 +1011,9 @@ module ClusterEntity @uptime = 'unknown' @name = nil @services = {} - [:pacemaker, :corosync, :pcsd, :cman, :sbd].each do |service| + [ + :pacemaker, :pacemaker_remote, :corosync, :pcsd, :cman, :sbd + ].each do |service| @services[service] = { :installed => nil, :running => nil, diff --git a/pcsd/pcs.rb b/pcsd/pcs.rb index 57082be..0956de9 100644 --- a/pcsd/pcs.rb +++ b/pcsd/pcs.rb @@ -892,6 +892,10 @@ def pacemaker_running?() is_service_running?('pacemaker') end +def pacemaker_remote_running?() + is_service_running?('pacemaker_remote') +end + def get_pacemaker_version() begin stdout, stderror, retval = run_cmd( diff --git a/pcsd/remote.rb b/pcsd/remote.rb index 75c9465..6a3a692 100644 --- a/pcsd/remote.rb +++ b/pcsd/remote.rb @@ -769,9 +769,19 @@ def get_sw_versions(params, request, auth_user) end def remote_node_available(params, request, auth_user) - if (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or File.exist?("/var/lib/pacemaker/cib/cib.xml") + if ( + (not ISRHEL6 and File.exist?(Cfgsync::CorosyncConf.file_path)) or + (ISRHEL6 and File.exist?(Cfgsync::ClusterConf.file_path)) or + File.exist?("/var/lib/pacemaker/cib/cib.xml") + ) return JSON.generate({:node_available => false}) end + if pacemaker_remote_running?() + return JSON.generate({ + :node_available => false, + :pacemaker_remote => true, + }) + end return JSON.generate({:node_available => true}) end @@ -1038,6 +1048,8 @@ def node_status(params, request, auth_user) :cman => node.cman, :corosync_enabled => node.corosync_enabled, :pacemaker_enabled => node.pacemaker_enabled, + :pacemaker_remote => node.services[:pacemaker_remote][:running], + :pacemaker_remote_enabled => node.services[:pacemaker_remote][:enabled], :pcsd_enabled => node.pcsd_enabled, :corosync_online => status[:corosync_online], :corosync_offline => status[:corosync_offline], -- 1.8.3.1