|
 |
356a11 |
From c97ac6cda7f148dfdf09b8b4f4ce9762d9c59bcd Mon Sep 17 00:00:00 2001
|
|
 |
356a11 |
From: Andrew Beekhof <andrew@beekhof.net>
|
|
 |
356a11 |
Date: Tue, 4 Jul 2017 14:16:52 +1000
|
|
 |
356a11 |
Subject: [PATCH] Fix: PE: Ensure unrecoverable remote nodes are fenced even if
|
|
 |
356a11 |
no resources can run on them
|
|
 |
356a11 |
|
|
 |
356a11 |
---
|
|
 |
356a11 |
pengine/allocate.c | 106 ++++++++++++++++++++++++++++++++---------------------
|
|
 |
356a11 |
1 file changed, 65 insertions(+), 41 deletions(-)
|
|
 |
356a11 |
|
|
 |
356a11 |
diff --git a/pengine/allocate.c b/pengine/allocate.c
|
|
 |
356a11 |
index 3a95be6..3a883ad 100644
|
|
 |
356a11 |
--- a/pengine/allocate.c
|
|
 |
356a11 |
+++ b/pengine/allocate.c
|
|
 |
356a11 |
@@ -39,6 +39,16 @@ void migrate_reload_madness(pe_working_set_t * data_set);
|
|
 |
356a11 |
extern void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
|
|
 |
356a11 |
extern gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
|
|
 |
356a11 |
static void apply_remote_node_ordering(pe_working_set_t *data_set);
|
|
 |
356a11 |
+static enum remote_connection_state get_remote_node_state(pe_node_t *node);
|
|
 |
356a11 |
+enum remote_connection_state
|
|
 |
356a11 |
+{
|
|
 |
356a11 |
+ remote_state_unknown = 0,
|
|
 |
356a11 |
+ remote_state_alive = 1,
|
|
 |
356a11 |
+ remote_state_resting = 2,
|
|
 |
356a11 |
+ remote_state_failed = 3,
|
|
 |
356a11 |
+ remote_state_stopped = 4
|
|
 |
356a11 |
+};
|
|
 |
356a11 |
+
|
|
 |
356a11 |
|
|
 |
356a11 |
resource_alloc_functions_t resource_class_alloc_functions[] = {
|
|
 |
356a11 |
{
|
|
 |
356a11 |
@@ -886,21 +896,25 @@ probe_resources(pe_working_set_t * data_set)
|
|
 |
356a11 |
{
|
|
 |
356a11 |
action_t *probe_node_complete = NULL;
|
|
 |
356a11 |
|
|
 |
356a11 |
- GListPtr gIter = NULL;
|
|
 |
356a11 |
- GListPtr gIter2 = NULL;
|
|
 |
356a11 |
-
|
|
 |
356a11 |
- for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
|
|
 |
356a11 |
+ for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
|
|
 |
356a11 |
node_t *node = (node_t *) gIter->data;
|
|
 |
356a11 |
const char *probed = g_hash_table_lookup(node->details->attrs, CRM_OP_PROBED);
|
|
 |
356a11 |
|
|
 |
356a11 |
- if (node->details->online == FALSE) {
|
|
 |
356a11 |
+ if (is_container_remote_node(node)) {
|
|
 |
356a11 |
+ /* TODO enable guest node probes once ordered probing is implemented */
|
|
 |
356a11 |
continue;
|
|
 |
356a11 |
|
|
 |
356a11 |
- } else if (node->details->unclean) {
|
|
 |
356a11 |
+ } else if (node->details->online == FALSE && node->details->remote_rsc) {
|
|
 |
356a11 |
+ enum remote_connection_state state = get_remote_node_state(node);
|
|
 |
356a11 |
+ if(state == remote_state_failed) {
|
|
 |
356a11 |
+ pe_fence_node(data_set, node, "the connection is unrecoverable");
|
|
 |
356a11 |
+ }
|
|
 |
356a11 |
continue;
|
|
 |
356a11 |
|
|
 |
356a11 |
- } else if (is_container_remote_node(node)) {
|
|
 |
356a11 |
- /* TODO enable guest node probes once ordered probing is implemented */
|
|
 |
356a11 |
+ } else if(node->details->online == FALSE) {
|
|
 |
356a11 |
+ continue;
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ } else if (node->details->unclean) {
|
|
 |
356a11 |
continue;
|
|
 |
356a11 |
|
|
 |
356a11 |
} else if (node->details->rsc_discovery_enabled == FALSE) {
|
|
 |
356a11 |
@@ -916,7 +930,7 @@ probe_resources(pe_working_set_t * data_set)
|
|
 |
356a11 |
continue;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
- for (gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
|
|
 |
356a11 |
+ for (GListPtr gIter2 = data_set->resources; gIter2 != NULL; gIter2 = gIter2->next) {
|
|
 |
356a11 |
resource_t *rsc = (resource_t *) gIter2->data;
|
|
 |
356a11 |
|
|
 |
356a11 |
rsc->cmds->create_probe(rsc, node, probe_node_complete, FALSE, data_set);
|
|
 |
356a11 |
@@ -1749,15 +1763,6 @@ rsc_order_first(resource_t * lh_rsc, order_constraint_t * order, pe_working_set_
|
|
 |
356a11 |
extern gboolean update_action(action_t * action);
|
|
 |
356a11 |
extern void update_colo_start_chain(action_t * action);
|
|
 |
356a11 |
|
|
 |
356a11 |
-enum remote_connection_state
|
|
 |
356a11 |
-{
|
|
 |
356a11 |
- remote_state_unknown = 0,
|
|
 |
356a11 |
- remote_state_alive = 1,
|
|
 |
356a11 |
- remote_state_resting = 2,
|
|
 |
356a11 |
- remote_state_failed = 3,
|
|
 |
356a11 |
- remote_state_stopped = 4
|
|
 |
356a11 |
-};
|
|
 |
356a11 |
-
|
|
 |
356a11 |
static int
|
|
 |
356a11 |
is_recurring_action(action_t *action)
|
|
 |
356a11 |
{
|
|
 |
356a11 |
@@ -1874,29 +1879,24 @@ apply_container_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
}
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
-static void
|
|
 |
356a11 |
-apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
+static enum remote_connection_state
|
|
 |
356a11 |
+get_remote_node_state(pe_node_t *node)
|
|
 |
356a11 |
{
|
|
 |
356a11 |
resource_t *remote_rsc = NULL;
|
|
 |
356a11 |
node_t *cluster_node = NULL;
|
|
 |
356a11 |
- enum action_tasks task = text2task(action->task);
|
|
 |
356a11 |
- enum remote_connection_state state = remote_state_unknown;
|
|
 |
356a11 |
- enum pe_ordering order_opts = pe_order_none;
|
|
 |
356a11 |
|
|
 |
356a11 |
- if (action->rsc == NULL) {
|
|
 |
356a11 |
- return;
|
|
 |
356a11 |
+ if(node == NULL) {
|
|
 |
356a11 |
+ return remote_state_unknown;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
- CRM_ASSERT(action->node);
|
|
 |
356a11 |
- CRM_ASSERT(is_remote_node(action->node));
|
|
 |
356a11 |
-
|
|
 |
356a11 |
- remote_rsc = action->node->details->remote_rsc;
|
|
 |
356a11 |
+ remote_rsc = node->details->remote_rsc;
|
|
 |
356a11 |
CRM_ASSERT(remote_rsc);
|
|
 |
356a11 |
|
|
 |
356a11 |
if(remote_rsc->running_on) {
|
|
 |
356a11 |
cluster_node = remote_rsc->running_on->data;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
+
|
|
 |
356a11 |
/* If the cluster node the remote connection resource resides on
|
|
 |
356a11 |
* is unclean or went offline, we can't process any operations
|
|
 |
356a11 |
* on that remote node until after it starts elsewhere.
|
|
 |
356a11 |
@@ -1911,21 +1911,21 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
* must assume the worst and fence it.
|
|
 |
356a11 |
*/
|
|
 |
356a11 |
if (is_set(remote_rsc->flags, pe_rsc_failed)) {
|
|
 |
356a11 |
- state = remote_state_failed;
|
|
 |
356a11 |
+ return remote_state_failed;
|
|
 |
356a11 |
} else if(cluster_node && cluster_node->details->unclean) {
|
|
 |
356a11 |
- state = remote_state_failed;
|
|
 |
356a11 |
+ return remote_state_failed;
|
|
 |
356a11 |
} else {
|
|
 |
356a11 |
- state = remote_state_stopped;
|
|
 |
356a11 |
+ return remote_state_stopped;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
} else if (cluster_node == NULL) {
|
|
 |
356a11 |
/* Connection is recoverable but not currently running anywhere, see if we can recover it first */
|
|
 |
356a11 |
- state = remote_state_unknown;
|
|
 |
356a11 |
+ return remote_state_unknown;
|
|
 |
356a11 |
|
|
 |
356a11 |
} else if(cluster_node->details->unclean == TRUE
|
|
 |
356a11 |
|| cluster_node->details->online == FALSE) {
|
|
 |
356a11 |
/* Connection is running on a dead node, see if we can recover it first */
|
|
 |
356a11 |
- state = remote_state_resting;
|
|
 |
356a11 |
+ return remote_state_resting;
|
|
 |
356a11 |
|
|
 |
356a11 |
} else if (g_list_length(remote_rsc->running_on) > 1
|
|
 |
356a11 |
&& remote_rsc->partial_migration_source
|
|
 |
356a11 |
@@ -1934,10 +1934,34 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
* wait until after the resource migrates before performing
|
|
 |
356a11 |
* any actions.
|
|
 |
356a11 |
*/
|
|
 |
356a11 |
- state = remote_state_resting;
|
|
 |
356a11 |
+ return remote_state_resting;
|
|
 |
356a11 |
|
|
 |
356a11 |
- } else {
|
|
 |
356a11 |
- state = remote_state_alive;
|
|
 |
356a11 |
+ }
|
|
 |
356a11 |
+ return remote_state_alive;
|
|
 |
356a11 |
+}
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+static void
|
|
 |
356a11 |
+apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
+{
|
|
 |
356a11 |
+ resource_t *remote_rsc = NULL;
|
|
 |
356a11 |
+ node_t *cluster_node = NULL;
|
|
 |
356a11 |
+ enum action_tasks task = text2task(action->task);
|
|
 |
356a11 |
+ enum remote_connection_state state = get_remote_node_state(action->node);
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ enum pe_ordering order_opts = pe_order_none;
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ if (action->rsc == NULL) {
|
|
 |
356a11 |
+ return;
|
|
 |
356a11 |
+ }
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ CRM_ASSERT(action->node);
|
|
 |
356a11 |
+ CRM_ASSERT(is_remote_node(action->node));
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ remote_rsc = action->node->details->remote_rsc;
|
|
 |
356a11 |
+ CRM_ASSERT(remote_rsc);
|
|
 |
356a11 |
+
|
|
 |
356a11 |
+ if(remote_rsc->running_on) {
|
|
 |
356a11 |
+ cluster_node = remote_rsc->running_on->data;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
crm_trace("Order %s action %s relative to %s%s (state %d)",
|
|
 |
356a11 |
@@ -2049,13 +2073,11 @@ apply_remote_ordering(action_t *action, pe_working_set_t *data_set)
|
|
 |
356a11 |
static void
|
|
 |
356a11 |
apply_remote_node_ordering(pe_working_set_t *data_set)
|
|
 |
356a11 |
{
|
|
 |
356a11 |
- GListPtr gIter = data_set->actions;
|
|
 |
356a11 |
-
|
|
 |
356a11 |
if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
|
|
 |
356a11 |
return;
|
|
 |
356a11 |
}
|
|
 |
356a11 |
|
|
 |
356a11 |
- for (; gIter != NULL; gIter = gIter->next) {
|
|
 |
356a11 |
+ for (GListPtr gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {
|
|
 |
356a11 |
action_t *action = (action_t *) gIter->data;
|
|
 |
356a11 |
|
|
 |
356a11 |
if (action->rsc == NULL) {
|
|
 |
356a11 |
@@ -2092,12 +2114,14 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
|
|
 |
356a11 |
is_remote_node(action->node) == FALSE ||
|
|
 |
356a11 |
action->node->details->remote_rsc == NULL ||
|
|
 |
356a11 |
is_set(action->flags, pe_action_pseudo)) {
|
|
 |
356a11 |
- crm_trace("Nothing required for %s", action->uuid);
|
|
 |
356a11 |
+ crm_trace("Nothing required for %s on %s", action->uuid, action->node?action->node->details->uname:"NA");
|
|
 |
356a11 |
|
|
 |
356a11 |
} else if(action->node->details->remote_rsc->container) {
|
|
 |
356a11 |
+ crm_trace("Container ordering for %s", action->uuid);
|
|
 |
356a11 |
apply_container_ordering(action, data_set);
|
|
 |
356a11 |
|
|
 |
356a11 |
} else {
|
|
 |
356a11 |
+ crm_trace("Remote ordering for %s", action->uuid);
|
|
 |
356a11 |
apply_remote_ordering(action, data_set);
|
|
 |
356a11 |
}
|
|
 |
356a11 |
}
|
|
 |
356a11 |
--
|
|
 |
356a11 |
1.8.3.1
|
|
 |
356a11 |
|