Blame SOURCES/015-remote-ordering.patch

413fc7
From e4dae772074c964e10da59e2678f329c9c8a3bf1 Mon Sep 17 00:00:00 2001
413fc7
From: Ken Gaillot <kgaillot@redhat.com>
413fc7
Date: Tue, 13 Nov 2018 17:51:30 -0600
413fc7
Subject: [PATCH] Fix: scheduler: order resource moves after connection starts
413fc7
413fc7
This addresses a regression in behavior since 1.1.18 (via 3a34fed). By allowing
413fc7
stops to proceed before probes finished, that change allowed the stop of a
413fc7
resource moving to a coming-up remote node to happen before the remote node
413fc7
connection's start. If the remote connection start fails, the resource will
413fc7
have to be started again where it was, leading to unnecessary downtime.
413fc7
413fc7
Now, order the resource's stop after the remote connection's start.
413fc7
413fc7
RHBZ#1648507
413fc7
---
413fc7
 pengine/allocate.c | 19 +++++++++++++++++++
413fc7
 1 file changed, 19 insertions(+)
413fc7
413fc7
diff --git a/pengine/allocate.c b/pengine/allocate.c
413fc7
index 0ee8bb0..126ba90 100644
413fc7
--- a/pengine/allocate.c
413fc7
+++ b/pengine/allocate.c
413fc7
@@ -2224,6 +2224,25 @@ apply_remote_node_ordering(pe_working_set_t *data_set)
413fc7
             continue;
413fc7
         }
413fc7
 
413fc7
+        /* Another special case: if a resource is moving to a Pacemaker Remote
413fc7
+         * node, order the stop on the original node after any start of the
413fc7
+         * remote connection. This ensures that if the connection fails to
413fc7
+         * start, we leave the resource running on the original node.
413fc7
+         */
413fc7
+        if (safe_str_eq(action->task, RSC_START)) {
413fc7
+            for (GList *item = action->rsc->actions; item != NULL;
413fc7
+                 item = item->next) {
413fc7
+                pe_action_t *rsc_action = item->data;
413fc7
+
413fc7
+                if ((rsc_action->node->details != action->node->details)
413fc7
+                    && safe_str_eq(rsc_action->task, RSC_STOP)) {
413fc7
+                    custom_action_order(remote, start_key(remote), NULL,
413fc7
+                                        action->rsc, NULL, rsc_action,
413fc7
+                                        pe_order_optional, data_set);
413fc7
+                }
413fc7
+            }
413fc7
+        }
413fc7
+
413fc7
         /* The action occurs across a remote connection, so create
413fc7
          * ordering constraints that guarantee the action occurs while the node
413fc7
          * is active (after start, before stop ... things like that).
413fc7
-- 
413fc7
1.8.3.1
413fc7