From 2a080e5986331989a3164a35129e576641b2cca5 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Tue, 19 Jul 2016 16:42:44 +0200
Subject: [PATCH 1/2] allow to remove a dead node from a cluster
---
pcs/cluster.py | 41 +++++++++++++++++++++++++++--------------
1 file changed, 27 insertions(+), 14 deletions(-)
diff --git a/pcs/cluster.py b/pcs/cluster.py
index baa0f44..7a8615d 100644
--- a/pcs/cluster.py
+++ b/pcs/cluster.py
@@ -1076,7 +1076,7 @@ def disable_cluster_nodes(nodes):
if len(error_list) > 0:
utils.err("unable to disable all nodes\n" + "\n".join(error_list))
-def destroy_cluster(argv):
+def destroy_cluster(argv, keep_going=False):
if len(argv) > 0:
# stop pacemaker and resources while cluster is still quorate
nodes = argv
@@ -1085,7 +1085,14 @@ def destroy_cluster(argv):
# destroy will stop any remaining cluster daemons
error_list = parallel_for_nodes(utils.destroyCluster, nodes, quiet=True)
if error_list:
- utils.err("unable to destroy cluster\n" + "\n".join(error_list))
+ if keep_going:
+ print(
+ "Warning: unable to destroy cluster\n"
+ +
+ "\n".join(error_list)
+ )
+ else:
+ utils.err("unable to destroy cluster\n" + "\n".join(error_list))
def stop_cluster(argv):
if len(argv) > 0:
@@ -1347,19 +1354,25 @@ def cluster_node(argv):
node = argv[1]
node0, node1 = utils.parse_multiring_node(node)
-
if not node0:
utils.err("missing ring 0 address of the node")
- status,output = utils.checkAuthorization(node0)
- if status == 2:
- utils.err("pcsd is not running on %s" % node0)
- elif status == 3:
- utils.err(
- "%s is not yet authenticated (try pcs cluster auth %s)"
- % (node0, node0)
- )
- elif status != 0:
- utils.err(output)
+
+ # allow to continue if removing a node with --force
+ if add_node or "--force" not in utils.pcs_options:
+ status, output = utils.checkAuthorization(node0)
+ if status != 0:
+ if status == 2:
+ msg = "pcsd is not running on {0}".format(node0)
+ elif status == 3:
+ msg = (
+ "{node} is not yet authenticated "
+ + " (try pcs cluster auth {node})"
+ ).format(node=node0)
+ else:
+ msg = output
+ if not add_node:
+ msg += ", use --force to override"
+ utils.err(msg)
if add_node == True:
wait = False
@@ -1540,7 +1553,7 @@ def cluster_node(argv):
nodesRemoved = False
c_nodes = utils.getNodesFromCorosyncConf()
- destroy_cluster([node0])
+ destroy_cluster([node0], keep_going=("--force" in utils.pcs_options))
for my_node in c_nodes:
if my_node == node0:
continue
--
1.8.3.1
From c48716233ace08c16e7e4b66075aebeca9366321 Mon Sep 17 00:00:00 2001
From: Tomas Jelinek <tojeline@redhat.com>
Date: Wed, 20 Jul 2016 10:01:13 +0200
Subject: [PATCH 2/2] gui: allow to remove a dead node from a cluster
---
pcsd/remote.rb | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/pcsd/remote.rb b/pcsd/remote.rb
index 25fb74d..05a6d03 100644
--- a/pcsd/remote.rb
+++ b/pcsd/remote.rb
@@ -837,8 +837,15 @@ def remote_remove_nodes(params, request, auth_user)
stdout, stderr, retval = run_cmd(
auth_user, PCS, "cluster", "stop", *stop_params
)
- if retval != 0
- return [400, stderr.join]
+ if retval != 0 and not params['force']
+ # If forced, keep going even if unable to stop all nodes (they may be dead).
+ # Add info this error is forceable if pcs did not do it (e.g. when unable
+ # to connect to some nodes).
+ message = stderr.join
+ if not message.include?(', use --force to override')
+ message += ', use --force to override'
+ end
+ return [400, message]
end
node_list.each {|node|
--
1.8.3.1