Blame SOURCES/shutdown-lock-03.patch

b04960
From 749f6b256cb2864ce3e862442adc6d219eefeca3 Mon Sep 17 00:00:00 2001
b04960
From: Ken Gaillot <kgaillot@redhat.com>
b04960
Date: Fri, 6 Dec 2019 12:17:03 -0600
b04960
Subject: [PATCH 03/10] Low: scheduler: respect shutdown locks when placing
b04960
 inactive resources
b04960
b04960
When shutdown-lock is enabled, and we're either scheduling a resource stop
b04960
on a node that's cleanly shutting down or scheduling any action for a
b04960
previously locked resource, add "shutdown-lock=<shutdown-timestamp>" to the
b04960
graph action. The controller will be able to use this to know when to preserve
b04960
the lock (by adding the lock time to the resource state entry).
b04960
b04960
When the scheduler unpacks a resource state entry with a lock, it will remember
b04960
the lock node and lock time, which will trigger existing code for applying
b04960
shutdown locks.
b04960
---
b04960
 lib/pengine/unpack.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
b04960
 pengine/allocate.c   | 17 ++++++++++++++++-
b04960
 pengine/graph.c      | 32 +++++++++++++++++++++++++++++++-
b04960
 3 files changed, 91 insertions(+), 7 deletions(-)
b04960
b04960
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
b04960
index 7b0d837..bb5efa4 100644
b04960
--- a/lib/pengine/unpack.c
b04960
+++ b/lib/pengine/unpack.c
b04960
@@ -18,6 +18,7 @@
b04960
 #include <crm_internal.h>
b04960
 
b04960
 #include <glib.h>
b04960
+#include <time.h>
b04960
 
b04960
 #include <crm/crm.h>
b04960
 #include <crm/services.h>
b04960
@@ -1151,7 +1152,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
b04960
                 crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
b04960
 
b04960
             } else if (is_container_remote_node(this_node) == FALSE
b04960
-                       && rsc->role == RSC_ROLE_STARTED) {
b04960
+                       && ((rsc->role == RSC_ROLE_STARTED)
b04960
+                           || is_set(data_set->flags, pe_flag_shutdown_lock))) {
b04960
                 check = TRUE;
b04960
                 crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
b04960
             }
b04960
@@ -1167,6 +1169,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
b04960
 
b04960
         } else if (fence) {
b04960
             process = TRUE;
b04960
+
b04960
+        } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
b04960
+            process = TRUE;
b04960
         }
b04960
 
b04960
         if(process) {
b04960
@@ -2286,6 +2291,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
b04960
     }
b04960
 }
b04960
 
b04960
+// If resource history entry has shutdown lock, remember lock node and time
b04960
+static void
b04960
+unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
b04960
+                     pe_working_set_t *data_set)
b04960
+{
b04960
+    time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
b04960
+
b04960
+    if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
b04960
+                                 &lock_time) == pcmk_ok) && (lock_time != 0)) {
b04960
+
b04960
+        if ((data_set->shutdown_lock > 0)
b04960
+            && (get_effective_time(data_set)
b04960
+                > (lock_time + data_set->shutdown_lock))) {
b04960
+            pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
b04960
+                        rsc->id, node->details->uname);
b04960
+        } else {
b04960
+            rsc->lock_node = node;
b04960
+            rsc->lock_time = lock_time;
b04960
+        }
b04960
+    }
b04960
+}
b04960
+
b04960
 static resource_t *
b04960
 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
b04960
 {
b04960
@@ -2322,18 +2349,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data
b04960
         }
b04960
     }
b04960
 
b04960
-    if (op_list == NULL) {
b04960
-        /* if there are no operations, there is nothing to do */
b04960
-        return NULL;
b04960
+    if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) {
b04960
+        if (op_list == NULL) {
b04960
+            // If there are no operations, there is nothing to do
b04960
+            return NULL;
b04960
+        }
b04960
     }
b04960
 
b04960
     /* find the resource */
b04960
     rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
b04960
     if (rsc == NULL) {
b04960
-        rsc = process_orphan_resource(rsc_entry, node, data_set);
b04960
+        if (op_list == NULL) {
b04960
+            // If there are no operations, there is nothing to do
b04960
+            return NULL;
b04960
+        } else {
b04960
+            rsc = process_orphan_resource(rsc_entry, node, data_set);
b04960
+        }
b04960
     }
b04960
     CRM_ASSERT(rsc != NULL);
b04960
 
b04960
+    // Check whether the resource is "shutdown-locked" to this node
b04960
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
b04960
+        unpack_shutdown_lock(rsc_entry, rsc, node, data_set);
b04960
+    }
b04960
+
b04960
     /* process operations */
b04960
     saved_role = rsc->role;
b04960
     on_fail = action_fail_ignore;
b04960
diff --git a/pengine/allocate.c b/pengine/allocate.c
b04960
index 09f9e51..7366716 100644
b04960
--- a/pengine/allocate.c
b04960
+++ b/pengine/allocate.c
b04960
@@ -1047,8 +1047,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
b04960
         return;
b04960
     }
b04960
 
b04960
+    if (rsc->lock_node != NULL) {
b04960
+        // The lock was obtained from resource history
b04960
+
b04960
+        if (rsc->running_on != NULL) {
b04960
+            /* The resource was started elsewhere even though it is now
b04960
+             * considered locked. This shouldn't be possible, but as a
b04960
+             * failsafe, we don't want to disturb the resource now.
b04960
+             */
b04960
+            pe_rsc_info(rsc,
b04960
+                        "Cancelling shutdown lock because %s is already active",
b04960
+                        rsc->id);
b04960
+            rsc->lock_node = NULL;
b04960
+            rsc->lock_time = 0;
b04960
+        }
b04960
+
b04960
     // Only a resource active on exactly one node can be locked
b04960
-    if (pcmk__list_of_1(rsc->running_on)) {
b04960
+    } else if (pcmk__list_of_1(rsc->running_on)) {
b04960
         pe_node_t *node = rsc->running_on->data;
b04960
 
b04960
         if (node->details->shutdown) {
b04960
diff --git a/pengine/graph.c b/pengine/graph.c
b04960
index cba30d0..33168ca 100644
b04960
--- a/pengine/graph.c
b04960
+++ b/pengine/graph.c
b04960
@@ -1,5 +1,7 @@
b04960
 /*
b04960
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
b04960
+ * Copyright 2004-2020 the Pacemaker project contributors
b04960
+ *
b04960
+ * The version control history for this file may have further details.
b04960
  *
b04960
  * This program is free software; you can redistribute it and/or
b04960
  * modify it under the terms of the GNU General Public
b04960
@@ -998,6 +1000,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action,
b04960
     }
b04960
 }
b04960
 
b04960
+static bool
b04960
+should_lock_action(pe_action_t *action)
b04960
+{
b04960
+    // Only actions taking place on resource's lock node are locked
b04960
+    if ((action->rsc->lock_node == NULL) || (action->node == NULL)
b04960
+        || (action->node->details != action->rsc->lock_node->details)) {
b04960
+        return false;
b04960
+    }
b04960
+
b04960
+    /* During shutdown, only stops are locked (otherwise, another action such as
b04960
+     * a demote would cause the controller to clear the lock)
b04960
+     */
b04960
+    if (action->node->details->shutdown && action->task
b04960
+        && strcmp(action->task, RSC_STOP)) {
b04960
+        return false;
b04960
+    }
b04960
+
b04960
+    return true;
b04960
+}
b04960
+
b04960
 static xmlNode *
b04960
 action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
b04960
 {
b04960
@@ -1104,6 +1126,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
b04960
             XML_ATTR_TYPE
b04960
         };
b04960
 
b04960
+        /* If a resource is locked to a node via shutdown-lock, mark its actions
b04960
+         * so the controller can preserve the lock when the action completes.
b04960
+         */
b04960
+        if (should_lock_action(action)) {
b04960
+            crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
b04960
+                           (long long) action->rsc->lock_time);
b04960
+        }
b04960
+
b04960
         // List affected resource
b04960
 
b04960
         rsc_xml = create_xml_node(action_xml,
b04960
-- 
b04960
1.8.3.1
b04960