Blob Blame History Raw
From e4dae772074c964e10da59e2678f329c9c8a3bf1 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 13 Nov 2018 17:51:30 -0600
Subject: [PATCH] Fix: scheduler: order resource moves after connection starts

This addresses a regression in behavior since 1.1.18 (via 3a34fed). By allowing
stops to proceed before probes finished, that change allowed the stop of a
resource moving to a coming-up remote node to happen before the remote node
connection's start. If the remote connection start fails, the resource will
have to be started again where it was, leading to unnecessary downtime.

Now, order the resource's stop after the remote connection's start.

RHBZ#1648507
---
 pengine/allocate.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pengine/allocate.c b/pengine/allocate.c
index 0ee8bb0..126ba90 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -2224,6 +2224,25 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
             continue;
         }
 
+        /* Another special case: if a resource is moving to a Pacemaker Remote
+         * node, order the stop on the original node after any start of the
+         * remote connection. This ensures that if the connection fails to
+         * start, we leave the resource running on the original node.
+         */
+        if (safe_str_eq(action->task, RSC_START)) {
+            for (GList *item = action->rsc->actions; item != NULL;
+                 item = item->next) {
+                pe_action_t *rsc_action = item->data;
+
+                if ((rsc_action->node->details != action->node->details)
+                    && safe_str_eq(rsc_action->task, RSC_STOP)) {
+                    custom_action_order(remote, start_key(remote), NULL,
+                                        action->rsc, NULL, rsc_action,
+                                        pe_order_optional, data_set);
+                }
+            }
+        }
+
         /* The action occurs across a remote connection, so create
          * ordering constraints that guarantee the action occurs while the node
          * is active (after start, before stop ... things like that).
-- 
1.8.3.1