Tree - rpms/pacemaker - CentOS Git server

rpms / pacemaker

Blame SOURCES/012-stonith-ordering.patch

Blob History Raw

		413fc7	`From eb2854add713f22b083a54aa7caf04be5067b469 Mon Sep 17 00:00:00 2001`
		413fc7	`From: Ken Gaillot <kgaillot@redhat.com>`
		413fc7	`Date: Tue, 13 Nov 2018 18:05:04 -0600`
		413fc7	`Subject: [PATCH] Low: scheduler: order start after particular stonith op`
		413fc7
		413fc7	`Previously, if a resource's status was unknown on a node about to be fenced,`
		413fc7	`we ordered the resource start after all_stopped. This approximated stonith_done`
		413fc7	`before that was available. However, it makes more sense to order the start`
		413fc7	`after the particular stonith op for the node in question.`
		413fc7
		413fc7	`This improves recovery when multiple nodes are being fenced: resources can now`
		413fc7	`be recovered from one node when it is successfully fenced, even if the fencing`
		413fc7	`of another node fails.`
		413fc7	`---`
		413fc7	`pengine/native.c \| 63 +++++++++++++++++++++++++++++++-------------------------`
		413fc7	`1 file changed, 35 insertions(+), 28 deletions(-)`
		413fc7
		413fc7	`diff --git a/pengine/native.c b/pengine/native.c`
		413fc7	`index c6c1d55..9ee5990 100644`
		413fc7	`--- a/pengine/native.c`
		413fc7	`+++ b/pengine/native.c`
		413fc7	`@@ -2948,13 +2948,19 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete,`
		413fc7	`return TRUE;`
		413fc7	`}`
		413fc7
		413fc7	`+/*!`
		413fc7	`+ * \internal`
		413fc7	`+ * \brief Order a resource's start and promote actions relative to fencing`
		413fc7	`+ *`
		413fc7	`+ * \param[in] rsc Resource to be ordered`
		413fc7	`+ * \param[in] stonith_op Fence action`
		413fc7	`+ * \param[in] data_set Cluster information`
		413fc7	`+ */`
		413fc7	`static void`
		413fc7	`native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)`
		413fc7	`{`
		413fc7	`node_t *target;`
		413fc7	`GListPtr gIter = NULL;`
		413fc7	`- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);`
		413fc7	`- action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set);`
		413fc7
		413fc7	`CRM_CHECK(stonith_op && stonith_op->node, return);`
		413fc7	`target = stonith_op->node;`
		413fc7	`@@ -2962,34 +2968,35 @@ native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set`
		413fc7	`for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {`
		413fc7	`action_t action = (action_t ) gIter->data;`
		413fc7
		413fc7	`- if(action->needs == rsc_req_nothing) {`
		413fc7	`- /* Anything other than start or promote requires nothing */`
		413fc7	`-`
		413fc7	`- } else if (action->needs == rsc_req_stonith) {`
		413fc7	`- order_actions(stonith_done, action, pe_order_optional);`
		413fc7	`+ switch (action->needs) {`
		413fc7	`+ case rsc_req_nothing:`
		413fc7	`+ // Anything other than start or promote requires nothing`
		413fc7	`+ break;`
		413fc7
		413fc7	`- } else if (safe_str_eq(action->task, RSC_START)`
		413fc7	`- && NULL != pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)`
		413fc7	`- && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) {`
		413fc7	`- /* if known == NULL, then we don't know if`
		413fc7	`- * the resource is active on the node`
		413fc7	`- * we're about to shoot`
		413fc7	`- *`
		413fc7	`- * in this case, regardless of action->needs,`
		413fc7	`- * the only safe option is to wait until`
		413fc7	`- * the node is shot before doing anything`
		413fc7	`- * to with the resource`
		413fc7	`- *`
		413fc7	`- * it's analogous to waiting for all the probes`
		413fc7	`- * for rscX to complete before starting rscX`
		413fc7	`- *`
		413fc7	`- * the most likely explanation is that the`
		413fc7	`- * DC died and took its status with it`
		413fc7	`- */`
		413fc7	`+ case rsc_req_stonith:`
		413fc7	`+ order_actions(stonith_op, action, pe_order_optional);`
		413fc7	`+ break;`
		413fc7
		413fc7	`- pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,`
		413fc7	`- target->details->uname);`
		413fc7	`- order_actions(all_stopped, action, pe_order_optional \| pe_order_runnable_left);`
		413fc7	`+ case rsc_req_quorum:`
		413fc7	`+ if (safe_str_eq(action->task, RSC_START)`
		413fc7	`+ && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)`
		413fc7	`+ && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) {`
		413fc7	`+`
		413fc7	`+ /* If we don't know the status of the resource on the node`
		413fc7	`+ * we're about to shoot, we have to assume it may be active`
		413fc7	`+ * there. Order the resource start after the fencing. This`
		413fc7	`+ * is analogous to waiting for all the probes for a resource`
		413fc7	`+ * to complete before starting it.`
		413fc7	`+ *`
		413fc7	`+ * The most likely explanation is that the DC died and took`
		413fc7	`+ * its status with it.`
		413fc7	`+ */`
		413fc7	`+ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,`
		413fc7	`+ target->details->uname);`
		413fc7	`+ order_actions(stonith_op, action,`
		413fc7	`+ pe_order_optional \| pe_order_runnable_left);`
		413fc7	`+ }`
		413fc7	`+ break;`
		413fc7	`}`
		413fc7	`}`
		413fc7	`}`
		413fc7	`--`
		413fc7	`1.8.3.1`
		413fc7

rpms / pacemaker

Source Code

Blame SOURCES/012-stonith-ordering.patch