Tree - rpms/pacemaker - CentOS Git server

rpms / pacemaker

Blame SOURCES/081-fence-logs.patch

Blob History Raw

		356a11	`From 083c3a49ad41bd17387c8ae661c23b44d4b845c6 Mon Sep 17 00:00:00 2001`
		356a11	`From: Ken Gaillot <kgaillot@redhat.com>`
		356a11	`Date: Tue, 30 May 2017 14:43:25 -0500`
		356a11	`Subject: [PATCH] Log: pengine,libpe_status: revisit fencing messages`
		356a11
		356a11	`---`
		356a11	`lib/pengine/unpack.c \| 72 ++++++++++++++++++++++++++++++++--------------------`
		356a11	`pengine/allocate.c \| 65 ++++++++++++++++++++++++++---------------------`
		356a11	`2 files changed, 81 insertions(+), 56 deletions(-)`
		356a11
		356a11	`diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c`
		356a11	`index 377100c..21eca90 100644`
		356a11	`--- a/lib/pengine/unpack.c`
		356a11	`+++ b/lib/pengine/unpack.c`
		356a11	`@@ -63,6 +63,13 @@ is_dangling_container_remote_node(node_t *node)`
		356a11	`}`
		356a11
		356a11
		356a11	`+/*!`
		356a11	`+ * \brief Schedule a fence action for a node`
		356a11	`+ *`
		356a11	`+ * \param[in,out] data_set Current working set of cluster`
		356a11	`+ * \param[in,out] node Node to fence`
		356a11	`+ * \param[in] reason Text description of why fencing is needed`
		356a11	`+ */`
		356a11	`void`
		356a11	`pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)`
		356a11	`{`
		356a11	`@@ -74,11 +81,13 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)`
		356a11
		356a11	`if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {`
		356a11	`if (!is_set(rsc->flags, pe_rsc_managed)) {`
		356a11	`- crm_notice("Not fencing node %s due to '%s': container %s is"`
		356a11	`- " unmanaged"`
		356a11	`- "%s", node->details->uname, reason, rsc->id);`
		356a11	`+ crm_notice("Not fencing guest node %s "`
		356a11	`+ "(otherwise would because %s): "`
		356a11	`+ "its guest resource %s is unmanaged",`
		356a11	`+ node->details->uname, reason, rsc->id);`
		356a11	`} else {`
		356a11	`- crm_warn("Remote node %s will be fenced due to '%s' by recovering %s",`
		356a11	`+ crm_warn("Guest node %s will be fenced "`
		356a11	`+ "(by recovering its guest resource %s): %s",`
		356a11	`node->details->uname, rsc->id, reason);`
		356a11
		356a11	`/* We don't mark the node as unclean because that would prevent the`
		356a11	`@@ -91,8 +100,9 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)`
		356a11	`}`
		356a11
		356a11	`} else if (is_dangling_container_remote_node(node)) {`
		356a11	`- crm_info("Cleaning up dangling connection resource for guest node %s due to '%s'"`
		356a11	`- " (fencing is already done, guest resource no longer exists)",`
		356a11	`+ crm_info("Cleaning up dangling connection for guest node %s: "`
		356a11	`+ "fencing was already done because %s, "`
		356a11	`+ "and guest resource no longer exists",`
		356a11	`node->details->uname, reason);`
		356a11	`set_bit(node->details->remote_rsc->flags, pe_rsc_failed);`
		356a11
		356a11	`@@ -100,31 +110,29 @@ pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)`
		356a11	`resource_t *rsc = node->details->remote_rsc;`
		356a11
		356a11	`if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {`
		356a11	`- crm_notice("Not fencing node %s due to '%s': connection is unmanaged",`
		356a11	`+ crm_notice("Not fencing remote node %s "`
		356a11	`+ "(otherwise would because %s): connection is unmanaged",`
		356a11	`node->details->uname, reason);`
		356a11	`} else if(node->details->remote_requires_reset == FALSE) {`
		356a11	`node->details->remote_requires_reset = TRUE;`
		356a11	`- if (pe_can_fence(data_set, node)) {`
		356a11	`- crm_warn("Remote node %s will be fenced due to %s", node->details->uname, reason);`
		356a11	`- } else {`
		356a11	`- crm_warn("Remote node %s is unclean due to %s", node->details->uname, reason);`
		356a11	`- }`
		356a11	`+ crm_warn("Remote node %s %s: %s",`
		356a11	`+ node->details->uname,`
		356a11	`+ pe_can_fence(data_set, node)? "will be fenced" : "is unclean",`
		356a11	`+ reason);`
		356a11	`}`
		356a11	`node->details->unclean = TRUE;`
		356a11
		356a11	`} else if (node->details->unclean) {`
		356a11	`- if (pe_can_fence(data_set, node)) {`
		356a11	`- crm_trace("Node %s would also be fenced due to '%s'", node->details->uname, reason);`
		356a11	`- } else {`
		356a11	`- crm_trace("Node %s is also unclean due to '%s'", node->details->uname, reason);`
		356a11	`- }`
		356a11	`-`
		356a11	`- } else if (pe_can_fence(data_set, node)) {`
		356a11	`- crm_warn("Node %s will be fenced due to %s", node->details->uname, reason);`
		356a11	`- node->details->unclean = TRUE;`
		356a11	`+ crm_trace("Cluster node %s %s because %s",`
		356a11	`+ node->details->uname,`
		356a11	`+ pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",`
		356a11	`+ reason);`
		356a11
		356a11	`} else {`
		356a11	`- crm_warn("Node %s is unclean due to %s", node->details->uname, reason);`
		356a11	`+ crm_warn("Cluster node %s %s: %s",`
		356a11	`+ node->details->uname,`
		356a11	`+ pe_can_fence(data_set, node)? "will be fenced" : "is unclean",`
		356a11	`+ reason);`
		356a11	`node->details->unclean = TRUE;`
		356a11	`}`
		356a11	`}`
		356a11	`@@ -1878,6 +1886,8 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11	`xmlNode * migrate_op, pe_working_set_t * data_set)`
		356a11	`{`
		356a11	`node_t *tmpnode = NULL;`
		356a11	`+ char *reason = NULL;`
		356a11	`+`
		356a11	`CRM_ASSERT(rsc);`
		356a11	`pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",`
		356a11	`rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));`
		356a11	`@@ -1907,7 +1917,6 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11	`&& node->details->maintenance == FALSE`
		356a11	`&& is_set(rsc->flags, pe_rsc_managed)) {`
		356a11
		356a11	`- char *reason = NULL;`
		356a11	`gboolean should_fence = FALSE;`
		356a11
		356a11	`/* If this is a guest node, fence it (regardless of whether fencing is`
		356a11	`@@ -1922,14 +1931,19 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11	`should_fence = TRUE;`
		356a11
		356a11	`} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {`
		356a11	`- if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {`
		356a11	`+ if (is_baremetal_remote_node(node) && node->details->remote_rsc`
		356a11	`+ && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {`
		356a11	`+`
		356a11	`/* setting unseen = true means that fencing of the remote node will`
		356a11	`* only occur if the connection resource is not going to start somewhere.`
		356a11	`* This allows connection resources on a failed cluster-node to move to`
		356a11	`* another node without requiring the baremetal remote nodes to be fenced`
		356a11	`* as well. */`
		356a11	`node->details->unseen = TRUE;`
		356a11	`- reason = crm_strdup_printf("%s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id);`
		356a11	`+ reason = crm_strdup_printf("%s is active there (fencing will be"`
		356a11	`+ " revoked if remote connection can "`
		356a11	`+ "be re-established elsewhere)",`
		356a11	`+ rsc->id);`
		356a11	`}`
		356a11	`should_fence = TRUE;`
		356a11	`}`
		356a11	`@@ -1959,7 +1973,9 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11	`/* treat it as if it is still running`
		356a11	`* but also mark the node as unclean`
		356a11	`*/`
		356a11	`- pe_fence_node(data_set, node, "resource failure(s)");`
		356a11	`+ reason = crm_strdup_printf("%s failed there", rsc->id);`
		356a11	`+ pe_fence_node(data_set, node, reason);`
		356a11	`+ free(reason);`
		356a11	`break;`
		356a11
		356a11	`case action_fail_standby:`
		356a11	`@@ -2002,6 +2018,7 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11	`stop_action(rsc, node, FALSE);`
		356a11	`}`
		356a11	`break;`
		356a11	`+`
		356a11	`case action_fail_reset_remote:`
		356a11	`set_bit(rsc->flags, pe_rsc_failed);`
		356a11	`if (is_set(data_set->flags, pe_flag_stonith_enabled)) {`
		356a11	`@@ -2015,7 +2032,8 @@ process_rsc_state(resource_t * rsc, node_t * node,`
		356a11
		356a11	`/* connection resource to baremetal resource failed in a way that`
		356a11	`* should result in fencing the remote-node. */`
		356a11	`- pe_fence_node(data_set, tmpnode, "of connection failure(s)");`
		356a11	`+ pe_fence_node(data_set, tmpnode,`
		356a11	`+ "remote connection is unrecoverable");`
		356a11	`}`
		356a11	`}`
		356a11
		356a11	`diff --git a/pengine/allocate.c b/pengine/allocate.c`
		356a11	`index 0020af6..f2987cc 100644`
		356a11	`--- a/pengine/allocate.c`
		356a11	`+++ b/pengine/allocate.c`
		356a11	`@@ -467,7 +467,7 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki`
		356a11	`set_bit(action_clear->flags, pe_action_runnable);`
		356a11
		356a11	`crm_notice("Clearing failure of %s on %s "`
		356a11	`- "action definition changed " CRM_XS " %s",`
		356a11	`+ "because action definition changed " CRM_XS " %s",`
		356a11	`rsc->id, node->details->uname, action_clear->uuid);`
		356a11	`}`
		356a11	`}`
		356a11	`@@ -1789,7 +1789,6 @@ apply_container_ordering(action_t action, pe_working_set_t data_set)`
		356a11
		356a11	`CRM_ASSERT(action->node);`
		356a11	`CRM_ASSERT(is_remote_node(action->node));`
		356a11	`- CRM_ASSERT(action->node->details->remote_rsc);`
		356a11
		356a11	`remote_rsc = action->node->details->remote_rsc;`
		356a11	`CRM_ASSERT(remote_rsc);`
		356a11	`@@ -1801,7 +1800,13 @@ apply_container_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`pe_fence_node(data_set, action->node, "container failed");`
		356a11	`}`
		356a11
		356a11	`- crm_trace("%s %s %s %s %d", action->uuid, action->task, remote_rsc->id, container->id, is_set(container->flags, pe_rsc_failed));`
		356a11	`+ crm_trace("Order %s action %s relative to %s%s for %s%s",`
		356a11	`+ action->task, action->uuid,`
		356a11	`+ is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",`
		356a11	`+ remote_rsc->id,`
		356a11	`+ is_set(container->flags, pe_rsc_failed)? "failed " : "",`
		356a11	`+ container->id);`
		356a11	`+`
		356a11	`switch (task) {`
		356a11	`case start_rsc:`
		356a11	`case action_promote:`
		356a11	`@@ -1874,6 +1879,7 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`node_t *cluster_node = NULL;`
		356a11	`enum action_tasks task = text2task(action->task);`
		356a11	`enum remote_connection_state state = remote_state_unknown;`
		356a11	`+ enum pe_ordering order_opts = pe_order_none;`
		356a11
		356a11	`if (action->rsc == NULL) {`
		356a11	`return;`
		356a11	`@@ -1881,7 +1887,6 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11
		356a11	`CRM_ASSERT(action->node);`
		356a11	`CRM_ASSERT(is_remote_node(action->node));`
		356a11	`- CRM_ASSERT(action->node->details->remote_rsc);`
		356a11
		356a11	`remote_rsc = action->node->details->remote_rsc;`
		356a11	`CRM_ASSERT(remote_rsc);`
		356a11	`@@ -1895,7 +1900,7 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`* on that remote node until after it starts elsewhere.`
		356a11	`*/`
		356a11	`if(remote_rsc->next_role == RSC_ROLE_STOPPED \|\| remote_rsc->allocated_to == NULL) {`
		356a11	`- /* There is no-where left to run the connection resource`
		356a11	`+ /* There is nowhere left to run the connection resource,`
		356a11	`* and the resource is in a failed state (either directly`
		356a11	`* or because it is located on a failed node).`
		356a11	`*`
		356a11	`@@ -1903,8 +1908,7 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`* or if there are resources in an unknown state (probe), we`
		356a11	`* must assume the worst and fence it.`
		356a11	`*/`
		356a11	`-`
		356a11	`- if(is_set(action->node->details->remote_rsc->flags, pe_rsc_failed)) {`
		356a11	`+ if (is_set(remote_rsc->flags, pe_rsc_failed)) {`
		356a11	`state = remote_state_failed;`
		356a11	`} else if(cluster_node && cluster_node->details->unclean) {`
		356a11	`state = remote_state_failed;`
		356a11	`@@ -1934,22 +1938,31 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`state = remote_state_alive;`
		356a11	`}`
		356a11
		356a11	`- crm_trace("%s %s %s %d %d", action->uuid, action->task, action->node->details->uname, state, is_set(remote_rsc->flags, pe_rsc_failed));`
		356a11	`+ crm_trace("Order %s action %s relative to %s%s (state %d)",`
		356a11	`+ action->task, action->uuid,`
		356a11	`+ is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",`
		356a11	`+ remote_rsc->id, state);`
		356a11	`switch (task) {`
		356a11	`case start_rsc:`
		356a11	`case action_promote:`
		356a11	`- if(state == remote_state_failed) {`
		356a11	`- /* Wait for the connection resource to be up and force recovery */`
		356a11	`- custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,`
		356a11	`- action->rsc, NULL, action,`
		356a11	`- pe_order_preserve \| pe_order_implies_then \| pe_order_runnable_left, data_set);`
		356a11	`- } else {`
		356a11	`- /* Ensure the connection resource is up and assume everything is as we left it */`
		356a11	`- custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,`
		356a11	`- action->rsc, NULL, action,`
		356a11	`- pe_order_preserve \| pe_order_runnable_left, data_set);`
		356a11	`+ /* This as an internally generated constraint exempt from`
		356a11	`+ * user constraint prohibitions, and this action isn't runnable`
		356a11	`+ * if the connection start isn't runnable.`
		356a11	`+ */`
		356a11	`+ order_opts = pe_order_preserve \| pe_order_runnable_left;`
		356a11	`+`
		356a11	`+ if (state == remote_state_failed) {`
		356a11	`+ /* Force recovery, by making this action required */`
		356a11	`+ order_opts \|= pe_order_implies_then;`
		356a11	`}`
		356a11	`+`
		356a11	`+ /* Ensure connection is up before running this action */`
		356a11	`+ custom_action_order(remote_rsc,`
		356a11	`+ generate_op_key(remote_rsc->id, RSC_START, 0),`
		356a11	`+ NULL, action->rsc, NULL, action, order_opts,`
		356a11	`+ data_set);`
		356a11	`break;`
		356a11	`+`
		356a11	`case stop_rsc:`
		356a11	`/* Handle special case with remote node where stop actions need to be`
		356a11	`* ordered after the connection resource starts somewhere else.`
		356a11	`@@ -1975,22 +1988,19 @@ apply_remote_ordering(action_t action, pe_working_set_t data_set)`
		356a11	`pe_order_preserve \| pe_order_implies_first, data_set);`
		356a11	`}`
		356a11	`break;`
		356a11	`- case action_demote:`
		356a11
		356a11	`- /* If the connection is being torn down, we don't want`
		356a11	`- * to build a constraint between a resource's demotion and`
		356a11	`- * the connection resource starting... because the connection`
		356a11	`- * resource can not start. The connection might already be up,`
		356a11	`- * but the "start" action would not be allowed, which in turn would`
		356a11	`- * block the demotion of any resources living in the node.`
		356a11	`+ case action_demote:`
		356a11	`+ /* Only order this demote relative to the connection start if the`
		356a11	`+ * connection isn't being torn down. Otherwise, the demote would be`
		356a11	`+ * blocked because the connection start would not be allowed.`
		356a11	`*/`
		356a11	`-`
		356a11	`if(state == remote_state_resting \|\| state == remote_state_unknown) {`
		356a11	`custom_action_order(remote_rsc, generate_op_key(remote_rsc->id, RSC_START, 0), NULL,`
		356a11	`action->rsc, NULL, action,`
		356a11	`pe_order_preserve, data_set);`
		356a11	`} /* Otherwise we can rely on the stop ordering */`
		356a11	`break;`
		356a11	`+`
		356a11	`default:`
		356a11	`/* Wait for the connection resource to be up */`
		356a11	`if (is_recurring_action(action)) {`
		356a11	`@@ -2261,15 +2271,12 @@ stage7(pe_working_set_t * data_set)`
		356a11	`order_probes(data_set);`
		356a11
		356a11	`crm_trace("Updating %d actions", g_list_length(data_set->actions));`
		356a11	`-`
		356a11	`for (gIter = data_set->actions; gIter != NULL; gIter = gIter->next) {`
		356a11	`action_t action = (action_t ) gIter->data;`
		356a11
		356a11	`update_action(action);`
		356a11	`}`
		356a11
		356a11	`- crm_trace("Processing reloads");`
		356a11	`-`
		356a11	`LogNodeActions(data_set, FALSE);`
		356a11	`for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {`
		356a11	`resource_t rsc = (resource_t ) gIter->data;`
		356a11	`--`
		356a11	`1.8.3.1`
		356a11

rpms / pacemaker

Source Code

Blame SOURCES/081-fence-logs.patch