Blob Blame History Raw
From 2a080e5986331989a3164a35129e576641b2cca5 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Tue, 19 Jul 2016 16:42:44 +0200
Subject: [PATCH 1/2] allow to remove a dead node from a cluster

---
 pcs/cluster.py | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/pcs/cluster.py b/pcs/cluster.py
index baa0f44..7a8615d 100644
--- a/pcs/cluster.py
+++ b/pcs/cluster.py
@@ -1076,7 +1076,7 @@ def disable_cluster_nodes(nodes):
     if len(error_list) > 0:
         utils.err("unable to disable all nodes\n" + "\n".join(error_list))
 
-def destroy_cluster(argv):
+def destroy_cluster(argv, keep_going=False):
     if len(argv) > 0:
         # stop pacemaker and resources while cluster is still quorate
         nodes = argv
@@ -1085,7 +1085,14 @@ def destroy_cluster(argv):
         # destroy will stop any remaining cluster daemons
         error_list = parallel_for_nodes(utils.destroyCluster, nodes, quiet=True)
         if error_list:
-            utils.err("unable to destroy cluster\n" + "\n".join(error_list))
+            if keep_going:
+                print(
+                    "Warning: unable to destroy cluster\n"
+                    +
+                    "\n".join(error_list)
+                )
+            else:
+                utils.err("unable to destroy cluster\n" + "\n".join(error_list))
 
 def stop_cluster(argv):
     if len(argv) > 0:
@@ -1347,19 +1354,25 @@ def cluster_node(argv):
 
     node = argv[1]
     node0, node1 = utils.parse_multiring_node(node)
-
     if not node0:
         utils.err("missing ring 0 address of the node")
-    status,output = utils.checkAuthorization(node0)
-    if status == 2:
-        utils.err("pcsd is not running on %s" % node0)
-    elif status == 3:
-        utils.err(
-            "%s is not yet authenticated (try pcs cluster auth %s)"
-            % (node0, node0)
-        )
-    elif status != 0:
-        utils.err(output)
+
+    # allow to continue if removing a node with --force
+    if add_node or "--force" not in utils.pcs_options:
+        status, output = utils.checkAuthorization(node0)
+        if status != 0:
+            if status == 2:
+                msg = "pcsd is not running on {0}".format(node0)
+            elif status == 3:
+                msg = (
+                    "{node} is not yet authenticated "
+                    + " (try pcs cluster auth {node})"
+                ).format(node=node0)
+            else:
+                msg = output
+            if not add_node:
+                msg += ", use --force to override"
+            utils.err(msg)
 
     if add_node == True:
         wait = False
@@ -1540,7 +1553,7 @@ def cluster_node(argv):
 
         nodesRemoved = False
         c_nodes = utils.getNodesFromCorosyncConf()
-        destroy_cluster([node0])
+        destroy_cluster([node0], keep_going=("--force" in utils.pcs_options))
         for my_node in c_nodes:
             if my_node == node0:
                 continue
-- 
1.8.3.1


From c48716233ace08c16e7e4b66075aebeca9366321 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Wed, 20 Jul 2016 10:01:13 +0200
Subject: [PATCH 2/2] gui: allow to remove a dead node from a cluster

---
 pcsd/remote.rb | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pcsd/remote.rb b/pcsd/remote.rb
index 25fb74d..05a6d03 100644
--- a/pcsd/remote.rb
+++ b/pcsd/remote.rb
@@ -837,8 +837,15 @@ def remote_remove_nodes(params, request, auth_user)
   stdout, stderr, retval = run_cmd(
     auth_user, PCS, "cluster", "stop", *stop_params
   )
-  if retval != 0
-    return [400, stderr.join]
+  if retval != 0 and not params['force']
+    # If forced, keep going even if unable to stop all nodes (they may be dead).
+    # Add info this error is forceable if pcs did not do it (e.g. when unable
+    # to connect to some nodes).
+    message = stderr.join
+    if not message.include?(', use --force to override')
+      message += ', use --force to override'
+    end
+    return [400, message]
   end
 
   node_list.each {|node|
-- 
1.8.3.1