From e4dae772074c964e10da59e2678f329c9c8a3bf1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 13 Nov 2018 17:51:30 -0600 Subject: [PATCH] Fix: scheduler: order resource moves after connection starts This addresses a regression in behavior since 1.1.18 (via 3a34fed). By allowing stops to proceed before probes finished, that change allowed the stop of a resource moving to a coming-up remote node to happen before the remote node connection's start. If the remote connection start fails, the resource will have to be started again where it was, leading to unnecessary downtime. Now, order the resource's stop after the remote connection's start. RHBZ#1648507 --- pengine/allocate.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pengine/allocate.c b/pengine/allocate.c index 0ee8bb0..126ba90 100644 --- a/pengine/allocate.c +++ b/pengine/allocate.c @@ -2224,6 +2224,25 @@ apply_remote_node_ordering(pe_working_set_t *data_set) continue; } + /* Another special case: if a resource is moving to a Pacemaker Remote + * node, order the stop on the original node after any start of the + * remote connection. This ensures that if the connection fails to + * start, we leave the resource running on the original node. + */ + if (safe_str_eq(action->task, RSC_START)) { + for (GList *item = action->rsc->actions; item != NULL; + item = item->next) { + pe_action_t *rsc_action = item->data; + + if ((rsc_action->node->details != action->node->details) + && safe_str_eq(rsc_action->task, RSC_STOP)) { + custom_action_order(remote, start_key(remote), NULL, + action->rsc, NULL, rsc_action, + pe_order_optional, data_set); + } + } + } + /* The action occurs across a remote connection, so create * ordering constraints that guarantee the action occurs while the node * is active (after start, before stop ... things like that). -- 1.8.3.1