Blame SOURCES/shutdown-lock-02.patch

af918f
From 4e85d3012d61dcf534a51d4f82b91fce9aef8d0b Mon Sep 17 00:00:00 2001
af918f
From: Ken Gaillot <kgaillot@redhat.com>
af918f
Date: Fri, 6 Dec 2019 11:57:59 -0600
af918f
Subject: [PATCH 02/10] Low: scheduler: respect shutdown locks when placing
af918f
 active resources
af918f
af918f
Use new pe_resource_t members to indicate that a resource is locked to a
af918f
particular node.
af918f
af918f
For active resources (i.e. in the transition where the node is scheduled for
af918f
shutdown), these are connected by checking each lockable resource for whether
af918f
it is running on a single clean node that is shutting down.
af918f
af918f
When applying constraints, place -INFINITY location constraints for locked
af918f
resources on all nodes other than the lock node.
af918f
af918f
(Inactive resources -- i.e. in later transitions after the node is shut down --
af918f
are not yet locked.)
af918f
---
af918f
 include/crm/pengine/status.h |  2 ++
af918f
 pengine/allocate.c           | 86 ++++++++++++++++++++++++++++++++++++++++++++
af918f
 2 files changed, 88 insertions(+)
af918f
af918f
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
af918f
index c6d4bdb..1e8d5bb 100644
af918f
--- a/include/crm/pengine/status.h
af918f
+++ b/include/crm/pengine/status.h
af918f
@@ -347,6 +347,8 @@ struct resource_s {
af918f
     pe_working_set_t *cluster;
af918f
 
af918f
     pe_node_t *pending_node;    // Node on which pending_task is happening
af918f
+    pe_node_t *lock_node;       // Resource is shutdown-locked to this node
af918f
+    time_t lock_time;           // When shutdown lock started
af918f
 
af918f
 #if ENABLE_VERSIONED_ATTRS
af918f
     xmlNode *versioned_parameters;
af918f
diff --git a/pengine/allocate.c b/pengine/allocate.c
af918f
index 30d29e1..09f9e51 100644
af918f
--- a/pengine/allocate.c
af918f
+++ b/pengine/allocate.c
af918f
@@ -1009,6 +1009,86 @@ rsc_discover_filter(resource_t *rsc, node_t *node)
af918f
     }
af918f
 }
af918f
 
af918f
+static time_t
af918f
+shutdown_time(pe_node_t *node, pe_working_set_t *data_set)
af918f
+{
af918f
+    const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN);
af918f
+    time_t result = 0;
af918f
+
af918f
+    if (shutdown) {
af918f
+        errno = 0;
af918f
+        result = (time_t) crm_int_helper(shutdown, NULL);
af918f
+        if (errno != 0) {
af918f
+            result = 0;
af918f
+        }
af918f
+    }
af918f
+    return result? result : get_effective_time(data_set);
af918f
+}
af918f
+
af918f
+static void
af918f
+apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
af918f
+{
af918f
+    const char *class;
af918f
+
af918f
+    // Only primitives and (uncloned) groups may be locked
af918f
+    if (rsc->variant == pe_group) {
af918f
+        for (GList *item = rsc->children; item != NULL;
af918f
+             item = item->next) {
af918f
+            apply_shutdown_lock((pe_resource_t *) item->data, data_set);
af918f
+        }
af918f
+    } else if (rsc->variant != pe_native) {
af918f
+        return;
af918f
+    }
af918f
+
af918f
+    // Fence devices and remote connections can't be locked
af918f
+    class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
af918f
+    if ((class == NULL) || !strcmp(class, PCMK_RESOURCE_CLASS_STONITH)
af918f
+        || is_rsc_baremetal_remote_node(rsc, data_set)) {
af918f
+        return;
af918f
+    }
af918f
+
af918f
+    // Only a resource active on exactly one node can be locked
af918f
+    if (pcmk__list_of_1(rsc->running_on)) {
af918f
+        pe_node_t *node = rsc->running_on->data;
af918f
+
af918f
+        if (node->details->shutdown) {
af918f
+            if (node->details->unclean) {
af918f
+                pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown",
af918f
+                             rsc->id, node->details->uname);
af918f
+            } else {
af918f
+                rsc->lock_node = node;
af918f
+                rsc->lock_time = shutdown_time(node, data_set);
af918f
+            }
af918f
+        }
af918f
+    }
af918f
+
af918f
+    if (rsc->lock_node == NULL) {
af918f
+        // No lock needed
af918f
+        return;
af918f
+    }
af918f
+
af918f
+    if (data_set->shutdown_lock > 0) {
af918f
+        time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock;
af918f
+
af918f
+        pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
af918f
+                    rsc->id, rsc->lock_node->details->uname,
af918f
+                    (long long) lock_expiration);
af918f
+    } else {
af918f
+        pe_rsc_info(rsc, "Locking %s to %s due to shutdown",
af918f
+                    rsc->id, rsc->lock_node->details->uname);
af918f
+    }
af918f
+
af918f
+    // If resource is locked to one node, ban it from all other nodes
af918f
+    for (GList *item = data_set->nodes; item != NULL; item = item->next) {
af918f
+        pe_node_t *node = item->data;
af918f
+
af918f
+        if (strcmp(node->details->uname, rsc->lock_node->details->uname)) {
af918f
+            resource_location(rsc, node, -INFINITY,
af918f
+                              XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set);
af918f
+        }
af918f
+    }
af918f
+}
af918f
+
af918f
 /*
f0d7ba
  * \internal
f0d7ba
  * \brief Stage 2 of cluster status: apply node-specific criteria
af918f
@@ -1020,6 +1100,12 @@ stage2(pe_working_set_t * data_set)
af918f
 {
af918f
     GListPtr gIter = NULL;
af918f
 
af918f
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
af918f
+        for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
af918f
+            apply_shutdown_lock((pe_resource_t *) gIter->data, data_set);
af918f
+        }
af918f
+    }
af918f
+
f0d7ba
     if (is_not_set(data_set->flags, pe_flag_no_compat)) {
f0d7ba
         // @COMPAT API backward compatibility
f0d7ba
         for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
af918f
-- 
af918f
1.8.3.1
af918f