Blame SOURCES/bz1180506-stop-cluster-nodes-in-parallel.patch

337c54
From 18dc684896dd4c0e15434b06b53f2afa901adb8a Mon Sep 17 00:00:00 2001
337c54
From: Tomas Jelinek <tojeline@redhat.com>
337c54
Date: Mon, 12 Jan 2015 15:30:33 +0100
337c54
Subject: [PATCH] stop cluster nodes in parallel
337c54
337c54
---
337c54
 pcs/cluster.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++++---------
337c54
 pcs/pcs.py     | 19 +++++++++++++-
337c54
 pcs/utils.py   | 40 ++++++++++++++++++++++++------
337c54
 pcsd/pcs.rb    |  4 +--
337c54
 pcsd/pcsd.rb   |  2 +-
337c54
 pcsd/remote.rb | 19 +++++++++++---
337c54
 6 files changed, 137 insertions(+), 25 deletions(-)
337c54
337c54
diff --git a/pcs/cluster.py b/pcs/cluster.py
337c54
index 9730e55..5e94389 100644
337c54
--- a/pcs/cluster.py
337c54
+++ b/pcs/cluster.py
337c54
@@ -18,6 +18,7 @@ import datetime
337c54
 import commands
337c54
 import json
337c54
 import xml.dom.minidom
337c54
+import threading
337c54
 
337c54
 pcs_dir = os.path.dirname(os.path.realpath(__file__))
337c54
 COROSYNC_CONFIG_TEMPLATE = pcs_dir + "/corosync.conf.template"
337c54
@@ -125,9 +126,7 @@ def sync_start(partial_argv, nodes):
337c54
     for node in nodes:
337c54
         utils.setCorosyncConfig(node,config)
337c54
     print "Starting cluster on nodes: " + ", ".join(nodes) + "..."
337c54
-
337c54
-    for node in nodes:
337c54
-        utils.startCluster(node)
337c54
+    start_cluster_nodes(nodes)
337c54
 
337c54
 def sync(partial_argv,nodes):
337c54
     argv = partial_argv[:]
337c54
@@ -536,16 +535,29 @@ def start_cluster_all():
337c54
     start_cluster_nodes(utils.getNodesFromCorosyncConf())
337c54
 
337c54
 def start_cluster_nodes(nodes):
337c54
-    error_list = utils.map_for_error_list(utils.startCluster, nodes)
337c54
-    if len(error_list) > 0:
337c54
+    threads = dict()
337c54
+    for node in nodes:
337c54
+        threads[node] = NodeStartThread(node)
337c54
+    error_list = utils.run_node_threads(threads)
337c54
+    if error_list:
337c54
         utils.err("unable to start all nodes\n" + "\n".join(error_list))
337c54
 
337c54
 def stop_cluster_all():
337c54
     stop_cluster_nodes(utils.getNodesFromCorosyncConf())
337c54
 
337c54
 def stop_cluster_nodes(nodes):
337c54
-    error_list = utils.map_for_error_list(utils.stopCluster, nodes)
337c54
-    if len(error_list) > 0:
337c54
+    threads = dict()
337c54
+    for node in nodes:
337c54
+        threads[node] = NodeStopPacemakerThread(node)
337c54
+    error_list = utils.run_node_threads(threads)
337c54
+    if error_list:
337c54
+        utils.err("unable to stop all nodes\n" + "\n".join(error_list))
337c54
+
337c54
+    threads = dict()
337c54
+    for node in nodes:
337c54
+        threads[node] = NodeStopCorosyncThread(node)
337c54
+    error_list = utils.run_node_threads(threads)
337c54
+    if error_list:
337c54
         utils.err("unable to stop all nodes\n" + "\n".join(error_list))
337c54
 
337c54
 def node_standby(argv,standby=True):
337c54
@@ -610,27 +622,44 @@ def disable_cluster_nodes(nodes):
337c54
 
337c54
 def destroy_cluster(argv):
337c54
     if len(argv) > 0:
337c54
-        error_list = utils.map_for_error_list(utils.destroyCluster, argv)
337c54
-        if len(error_list) > 0:
337c54
+        threads = dict()
337c54
+        for node in argv:
337c54
+            threads[node] = NodeDestroyThread(node)
337c54
+        error_list = utils.run_node_threads(threads)
337c54
+        if error_list:
337c54
             utils.err("unable to destroy cluster\n" + "\n".join(error_list))
337c54
-        return
337c54
 
337c54
 def stop_cluster(argv):
337c54
     if len(argv) > 0:
337c54
         stop_cluster_nodes(argv)
337c54
         return
337c54
 
337c54
-    print "Stopping Cluster..."
337c54
+    stop_all = (
337c54
+        "--pacemaker" not in utils.pcs_options
337c54
+        and
337c54
+        "--corosync" not in utils.pcs_options
337c54
+    )
337c54
+    if stop_all or "--pacemaker" in utils.pcs_options:
337c54
+        stop_cluster_pacemaker()
337c54
+    if stop_all or "--corosync" in utils.pcs_options:
337c54
+        stop_cluster_corosync()
337c54
+
337c54
+def stop_cluster_pacemaker():
337c54
+    print "Stopping Cluster (pacemaker)...",
337c54
     output, retval = utils.run(["service", "pacemaker","stop"])
337c54
     if retval != 0:
337c54
         print output,
337c54
         utils.err("unable to stop pacemaker")
337c54
+
337c54
+def stop_cluster_corosync():
337c54
     if utils.is_rhel6():
337c54
+        print "Stopping Cluster (cman)...",
337c54
         output, retval = utils.run(["service", "cman","stop"])
337c54
         if retval != 0:
337c54
             print output,
337c54
             utils.err("unable to stop cman")
337c54
     else:
337c54
+        print "Stopping Cluster (corosync)...",
337c54
         output, retval = utils.run(["service", "corosync","stop"])
337c54
         if retval != 0:
337c54
             print output,
337c54
@@ -1200,3 +1229,30 @@ def cluster_quorum_unblock(argv):
337c54
     utils.set_cib_property("startup-fencing", startup_fencing)
337c54
     print "Waiting for nodes cancelled"
337c54
 
337c54
+class NodeActionThread(threading.Thread):
337c54
+    def __init__(self, node):
337c54
+        super(NodeActionThread, self).__init__()
337c54
+        self.node = node
337c54
+        self.retval = 0
337c54
+        self.output = ""
337c54
+
337c54
+class NodeStartThread(NodeActionThread):
337c54
+    def run(self):
337c54
+        self.retval, self.output = utils.startCluster(self.node, quiet=True)
337c54
+
337c54
+class NodeStopPacemakerThread(NodeActionThread):
337c54
+    def run(self):
337c54
+        self.retval, self.output = utils.stopCluster(
337c54
+            self.node, quiet=True, pacemaker=True, corosync=False
337c54
+        )
337c54
+
337c54
+class NodeStopCorosyncThread(NodeActionThread):
337c54
+    def run(self):
337c54
+        self.retval, self.output = utils.stopCluster(
337c54
+            self.node, quiet=True, pacemaker=False, corosync=True
337c54
+        )
337c54
+
337c54
+class NodeDestroyThread(NodeActionThread):
337c54
+    def run(self):
337c54
+        self.retval, self.output = utils.destroyCluster(self.node, quiet=True)
337c54
+
337c54
diff --git a/pcs/pcs.py b/pcs/pcs.py
337c54
index b2c3f4b..a0c0df5 100755
337c54
--- a/pcs/pcs.py
337c54
+++ b/pcs/pcs.py
337c54
@@ -54,7 +54,24 @@ def main(argv):
337c54
                 pcs_short_options_with_args.append(prev_char)
337c54
             prev_char = c
337c54
 
337c54
-        pcs_long_options = ["local","start","all","clone","master","force","corosync_conf=", "defaults","debug","version","help","fullhelp","off","from=","to=", "name=", "wait", "group=","groups","full","enable","node=","nodesc","transport=", "addr0=","addr1=","bcast0=","bcast1=","mcast0=","mcast1=","mcastport0=","mcastport1=","ttl0=","ttl1=","rrpmode=", "broadcast0", "broadcast1","wait_for_all=","auto_tie_breaker=","last_man_standing=", "last_man_standing_window=","no-default-ops","ipv6","token=", "token_coefficient=", "consensus=", "miss_count_const=", "fail_recv_const=","join=", "disabled", "after=", "before=", "autocorrect", "interactive", "autodelete"]
337c54
+        pcs_long_options = [
337c54
+            "debug", "version", "help", "fullhelp",
337c54
+            "force", "autocorrect", "interactive", "autodelete",
337c54
+            "all", "full", "groups", "local", "wait", "config",
337c54
+            "start", "enable", "disabled", "off",
337c54
+            "pacemaker", "corosync",
337c54
+            "no-default-ops", "defaults", "nodesc",
337c54
+            "clone", "master", "name=", "group=", "node=",
337c54
+            "from=", "to=", "after=", "before=",
337c54
+            "transport=", "rrpmode=", "ipv6",
337c54
+            "addr0=", "bcast0=", "mcast0=", "mcastport0=", "ttl0=", "broadcast0",
337c54
+            "addr1=", "bcast1=", "mcast1=", "mcastport1=", "ttl1=", "broadcast1",
337c54
+            "wait_for_all=", "auto_tie_breaker=", "last_man_standing=",
337c54
+            "last_man_standing_window=",
337c54
+            "token=", "token_coefficient=", "consensus=", "join=",
337c54
+            "miss_count_const=", "fail_recv_const=",
337c54
+            "corosync_conf=", "cluster_conf=",
337c54
+        ]
337c54
         # pull out negative number arguments and add them back after getopt
337c54
         prev_arg = ""
337c54
         for arg in argv:
337c54
diff --git a/pcs/utils.py b/pcs/utils.py
337c54
index 724519a..1f41ae0 100644
337c54
--- a/pcs/utils.py
337c54
+++ b/pcs/utils.py
337c54
@@ -208,11 +208,19 @@ def setCorosyncConfig(node,config):
337c54
         if status != 0:
337c54
             err("Unable to set corosync config")
337c54
 
337c54
-def startCluster(node):
337c54
-    return sendHTTPRequest(node, 'remote/cluster_start', None, False, True)
337c54
-
337c54
-def stopCluster(node):
337c54
-    return sendHTTPRequest(node, 'remote/cluster_stop', None, False, True)
337c54
+def startCluster(node, quiet=False):
337c54
+    return sendHTTPRequest(node, 'remote/cluster_start', None, False, not quiet)
337c54
+
337c54
+def stopCluster(node, quiet=False, pacemaker=True, corosync=True):
337c54
+    if (pacemaker and corosync) or (not pacemaker and not corosync):
337c54
+        data = None
337c54
+    elif pacemaker:
337c54
+        data = {"component": "pacemaker"}
337c54
+    elif corosync:
337c54
+        data = {"component": "corosync"}
337c54
+    if data:
337c54
+        data = urllib.urlencode(data)
337c54
+    return sendHTTPRequest(node, 'remote/cluster_stop', data, False, not quiet)
337c54
 
337c54
 def enableCluster(node):
337c54
     return sendHTTPRequest(node, 'remote/cluster_enable', None, False, True)
337c54
@@ -220,8 +228,8 @@ def enableCluster(node):
337c54
 def disableCluster(node):
337c54
     return sendHTTPRequest(node, 'remote/cluster_disable', None, False, True)
337c54
 
337c54
-def destroyCluster(node):
337c54
-    return sendHTTPRequest(node, 'remote/cluster_destroy')
337c54
+def destroyCluster(node, quiet=False):
337c54
+    return sendHTTPRequest(node, 'remote/cluster_destroy', None, not quiet, not quiet)
337c54
 
337c54
 def restoreConfig(node, tarball_data):
337c54
     data = urllib.urlencode({"tarball": tarball_data})
337c54
@@ -730,6 +738,24 @@ def map_for_error_list(callab, iterab):
337c54
             error_list.append(err)
337c54
     return error_list
337c54
 
337c54
+def run_node_threads(node_threads):
337c54
+    error_list = []
337c54
+    for node, thread in node_threads.items():
337c54
+        thread.daemon = True
337c54
+        thread.start()
337c54
+    while node_threads:
337c54
+        for node in node_threads.keys():
337c54
+            thread = node_threads[node]
337c54
+            thread.join(1)
337c54
+            if thread.is_alive():
337c54
+                continue
337c54
+            output = node + ": " + thread.output.strip()
337c54
+            print output
337c54
+            if thread.retval != 0:
337c54
+                error_list.append(output)
337c54
+            del node_threads[node]
337c54
+    return error_list
337c54
+
337c54
 # Check is something exists in the CIB, if it does return it, if not, return
337c54
 #  an empty string
337c54
 def does_exist(xpath_query):
337c54
diff --git a/pcsd/pcs.rb b/pcsd/pcs.rb
337c54
index a1acfdc..3fad833 100644
337c54
--- a/pcsd/pcs.rb
337c54
+++ b/pcsd/pcs.rb
337c54
@@ -283,7 +283,7 @@ def send_cluster_request_with_token(cluster_name, request, post=false, data={},
337c54
   return code,out
337c54
 end
337c54
 
337c54
-def send_request_with_token(node,request, post=false, data={}, remote=true, raw_data = nil)
337c54
+def send_request_with_token(node, request, post=false, data={}, remote=true, raw_data=nil, timeout=30)
337c54
   start = Time.now
337c54
   begin
337c54
     retval, token = get_node_token(node)
337c54
@@ -312,7 +312,7 @@ def send_request_with_token(node,request, post=false, data={}, remote=true, raw_
337c54
     myhttp.use_ssl = true
337c54
     myhttp.verify_mode = OpenSSL::SSL::VERIFY_NONE
337c54
     res = myhttp.start do |http|
337c54
-      http.read_timeout = 30 
337c54
+      http.read_timeout = timeout
337c54
       http.request(req)
337c54
     end
337c54
     return res.code.to_i, res.body
337c54
diff --git a/pcsd/pcsd.rb b/pcsd/pcsd.rb
337c54
index c653ae2..94fdae2 100644
337c54
--- a/pcsd/pcsd.rb
337c54
+++ b/pcsd/pcsd.rb
337c54
@@ -428,7 +428,7 @@ if not DISABLE_GUI
337c54
     }
337c54
 
337c54
     $logger.info("Sending setup cluster request for: " + @cluster_name + " to: " + @nodes[0])
337c54
-    code,out = send_request_with_token(@nodes[0], "setup_cluster", true, {:clustername => @cluster_name, :nodes => @nodes_rrp.join(';'), :options => options.to_json})
337c54
+    code,out = send_request_with_token(@nodes[0], "setup_cluster", true, {:clustername => @cluster_name, :nodes => @nodes_rrp.join(';'), :options => options.to_json}, true, nil, 60)
337c54
 
337c54
     if code == 200
337c54
       pcs_config.clusters << Cluster.new(@cluster_name, @nodes)
337c54
diff --git a/pcsd/remote.rb b/pcsd/remote.rb
337c54
index 9709941..2245d47 100644
337c54
--- a/pcsd/remote.rb
337c54
+++ b/pcsd/remote.rb
337c54
@@ -151,10 +151,23 @@ end
337c54
 
337c54
 def cluster_stop(params)
337c54
   if params[:name]
337c54
-    code, response = send_request_with_token(params[:name], 'cluster_stop', true)
337c54
+    params_without_name = params.reject {|key, value|
337c54
+      key == "name" or key == :name
337c54
+    }
337c54
+    code, response = send_request_with_token(
337c54
+      params[:name], 'cluster_stop', true, params_without_name
337c54
+    )
337c54
   else
337c54
-    $logger.info "Starting Daemons"
337c54
-    output =  `#{PCS} cluster stop`
337c54
+    options = ""
337c54
+    if params.has_key?("component")
337c54
+      if params["component"].downcase == "pacemaker"
337c54
+        options = "--pacemaker"
337c54
+      elsif params["component"].downcase == "corosync"
337c54
+        options = "--corosync"
337c54
+      end
337c54
+    end
337c54
+    $logger.info "Stopping Daemons #{options}"
337c54
+    output =  `#{PCS} cluster stop #{options}`
337c54
     $logger.debug output
337c54
     return output
337c54
   end
337c54
-- 
337c54
1.9.1
337c54