Blob Blame History Raw
From 16f57bb79de4f88c2def174e3bb7d8ef312674cd Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 6 Dec 2019 12:17:03 -0600
Subject: [PATCH 08/18] Low: scheduler: respect shutdown locks when placing
 inactive resources

When shutdown-lock is enabled, and we're either scheduling a resource stop
on a node that's cleanly shutting down or scheduling any action for a
previously locked resource, add "shutdown-lock=<shutdown-timestamp>" to the
graph action. The controller will be able to use this to know when to preserve
the lock (by adding the lock time to the resource state entry).

When the scheduler unpacks a resource state entry with a lock, it will remember
the lock node and lock time, which will trigger existing code for applying
shutdown locks.
---
 lib/pacemaker/pcmk_sched_allocate.c | 17 ++++++++++++-
 lib/pacemaker/pcmk_sched_graph.c    | 30 ++++++++++++++++++++++-
 lib/pengine/unpack.c                | 49 +++++++++++++++++++++++++++++++++----
 3 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
index 0314f1b..884e1bd 100644
--- a/lib/pacemaker/pcmk_sched_allocate.c
+++ b/lib/pacemaker/pcmk_sched_allocate.c
@@ -1015,8 +1015,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
         return;
     }
 
+    if (rsc->lock_node != NULL) {
+        // The lock was obtained from resource history
+
+        if (rsc->running_on != NULL) {
+            /* The resource was started elsewhere even though it is now
+             * considered locked. This shouldn't be possible, but as a
+             * failsafe, we don't want to disturb the resource now.
+             */
+            pe_rsc_info(rsc,
+                        "Cancelling shutdown lock because %s is already active",
+                        rsc->id);
+            rsc->lock_node = NULL;
+            rsc->lock_time = 0;
+        }
+
     // Only a resource active on exactly one node can be locked
-    if (pcmk__list_of_1(rsc->running_on)) {
+    } else if (pcmk__list_of_1(rsc->running_on)) {
         pe_node_t *node = rsc->running_on->data;
 
         if (node->details->shutdown) {
diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c
index a6967fe..2861f3d 100644
--- a/lib/pacemaker/pcmk_sched_graph.c
+++ b/lib/pacemaker/pcmk_sched_graph.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2019 the Pacemaker project contributors
+ * Copyright 2004-2020 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -988,6 +988,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action,
     }
 }
 
+static bool
+should_lock_action(pe_action_t *action)
+{
+    // Only actions taking place on resource's lock node are locked
+    if ((action->rsc->lock_node == NULL) || (action->node == NULL)
+        || (action->node->details != action->rsc->lock_node->details)) {
+        return false;
+    }
+
+    /* During shutdown, only stops are locked (otherwise, another action such as
+     * a demote would cause the controller to clear the lock)
+     */
+    if (action->node->details->shutdown && action->task
+        && strcmp(action->task, RSC_STOP)) {
+        return false;
+    }
+
+    return true;
+}
+
 static xmlNode *
 action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
 {
@@ -1097,6 +1117,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
             XML_ATTR_TYPE
         };
 
+        /* If a resource is locked to a node via shutdown-lock, mark its actions
+         * so the controller can preserve the lock when the action completes.
+         */
+        if (should_lock_action(action)) {
+            crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+                           (long long) action->rsc->lock_time);
+        }
+
         // List affected resource
 
         rsc_xml = create_xml_node(action_xml,
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 8c0d72a..5139e60 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <glib.h>
+#include <time.h>
 
 #include <crm/crm.h>
 #include <crm/services.h>
@@ -1059,7 +1060,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
                 crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
 
             } else if (!pe__is_guest_node(this_node)
-                       && rsc->role == RSC_ROLE_STARTED) {
+                       && ((rsc->role == RSC_ROLE_STARTED)
+                           || is_set(data_set->flags, pe_flag_shutdown_lock))) {
                 check = TRUE;
                 crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
             }
@@ -1075,6 +1077,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
 
         } else if (fence) {
             process = TRUE;
+
+        } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
+            process = TRUE;
         }
 
         if(process) {
@@ -2198,6 +2203,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
     }
 }
 
+// If resource history entry has shutdown lock, remember lock node and time
+static void
+unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
+                     pe_working_set_t *data_set)
+{
+    time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
+
+    if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+                                 &lock_time) == pcmk_ok) && (lock_time != 0)) {
+
+        if ((data_set->shutdown_lock > 0)
+            && (get_effective_time(data_set)
+                > (lock_time + data_set->shutdown_lock))) {
+            pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
+                        rsc->id, node->details->uname);
+        } else {
+            rsc->lock_node = node;
+            rsc->lock_time = lock_time;
+        }
+    }
+}
+
 static resource_t *
 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
 {
@@ -2234,18 +2261,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data
         }
     }
 
-    if (op_list == NULL) {
-        /* if there are no operations, there is nothing to do */
-        return NULL;
+    if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) {
+        if (op_list == NULL) {
+            // If there are no operations, there is nothing to do
+            return NULL;
+        }
     }
 
     /* find the resource */
     rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
     if (rsc == NULL) {
-        rsc = process_orphan_resource(rsc_entry, node, data_set);
+        if (op_list == NULL) {
+            // If there are no operations, there is nothing to do
+            return NULL;
+        } else {
+            rsc = process_orphan_resource(rsc_entry, node, data_set);
+        }
     }
     CRM_ASSERT(rsc != NULL);
 
+    // Check whether the resource is "shutdown-locked" to this node
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
+        unpack_shutdown_lock(rsc_entry, rsc, node, data_set);
+    }
+
     /* process operations */
     saved_role = rsc->role;
     on_fail = action_fail_ignore;
-- 
1.8.3.1