Blame SOURCES/010-shutdown-lock.patch

ed4e54
From 50b0944c8add3f16b8190e75a6d06c3473c12a8f Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Thu, 21 Nov 2019 14:48:02 -0600
ed4e54
Subject: [PATCH 06/18] Feature: scheduler: add shutdown lock cluster options
ed4e54
ed4e54
This commit adds shutdown-lock and shutdown-lock-limit options (just the
ed4e54
options, not the feature itself).
ed4e54
ed4e54
shutdown-lock defaults to false, which preserves current behavior. The intended
ed4e54
purpose of setting it to true is to *prevent* recovery of a node's resources
ed4e54
elsewhere when the node is cleanly shut down, until the node rejoins. If
ed4e54
shutdown-lock-limit is set to a nonzero time duration, the cluster will
ed4e54
be allowed to recover the resources if the node has not rejoined within this
ed4e54
time.
ed4e54
ed4e54
The use case is when rebooting a node (such as for software updates) is done by
ed4e54
cluster-unaware system administrators during scheduled maintenance windows,
ed4e54
resources prefer specific nodes, and resource recovery time is high.
ed4e54
---
ed4e54
 include/crm/msg_xml.h          |  4 +++-
ed4e54
 include/crm/pengine/pe_types.h |  2 ++
ed4e54
 lib/pengine/common.c           | 24 +++++++++++++++++++++++-
ed4e54
 lib/pengine/unpack.c           | 10 ++++++++++
ed4e54
 4 files changed, 38 insertions(+), 2 deletions(-)
ed4e54
ed4e54
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
ed4e54
index d56e40c..d0cdf6c 100644
ed4e54
--- a/include/crm/msg_xml.h
ed4e54
+++ b/include/crm/msg_xml.h
ed4e54
@@ -1,5 +1,5 @@
ed4e54
 /*
ed4e54
- * Copyright 2004-2019 the Pacemaker project contributors
ed4e54
+ * Copyright 2004-2020 the Pacemaker project contributors
ed4e54
  *
ed4e54
  * The version control history for this file may have further details.
ed4e54
  *
ed4e54
@@ -346,6 +346,8 @@ extern "C" {
ed4e54
 #  define XML_CONFIG_ATTR_FORCE_QUIT	"shutdown-escalation"
ed4e54
 #  define XML_CONFIG_ATTR_RECHECK	"cluster-recheck-interval"
ed4e54
 #  define XML_CONFIG_ATTR_FENCE_REACTION	"fence-reaction"
ed4e54
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK         "shutdown-lock"
ed4e54
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT   "shutdown-lock-limit"
ed4e54
 
ed4e54
 #  define XML_ALERT_ATTR_PATH		"path"
ed4e54
 #  define XML_ALERT_ATTR_TIMEOUT	"timeout"
ed4e54
diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
ed4e54
index 23e1c46..8a735a3 100644
ed4e54
--- a/include/crm/pengine/pe_types.h
ed4e54
+++ b/include/crm/pengine/pe_types.h
ed4e54
@@ -102,6 +102,7 @@ enum pe_find {
ed4e54
 #  define pe_flag_start_failure_fatal   0x00001000ULL
ed4e54
 #  define pe_flag_remove_after_stop     0x00002000ULL
ed4e54
 #  define pe_flag_startup_fencing       0x00004000ULL
ed4e54
+#  define pe_flag_shutdown_lock         0x00008000ULL
ed4e54
 
ed4e54
 #  define pe_flag_startup_probes        0x00010000ULL
ed4e54
 #  define pe_flag_have_status           0x00020000ULL
ed4e54
@@ -167,6 +168,7 @@ struct pe_working_set_s {
ed4e54
     GList *stop_needed; // Containers that need stop actions
ed4e54
     time_t recheck_by;  // Hint to controller to re-run scheduler by this time
ed4e54
     int ninstances;     // Total number of resource instances
ed4e54
+    guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
ed4e54
 };
ed4e54
 
ed4e54
 enum pe_check_parameters {
ed4e54
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
ed4e54
index da39c99..e72a033 100644
ed4e54
--- a/lib/pengine/common.c
ed4e54
+++ b/lib/pengine/common.c
ed4e54
@@ -1,5 +1,7 @@
ed4e54
 /*
ed4e54
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
ed4e54
+ * Copyright 2004-2020 the Pacemaker project contributors
ed4e54
+ *
ed4e54
+ * The version control history for this file may have further details.
ed4e54
  *
ed4e54
  * This source code is licensed under the GNU Lesser General Public License
ed4e54
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
ed4e54
@@ -85,6 +87,26 @@ static pe_cluster_option pe_opts[] = {
ed4e54
 	  "When set to TRUE, the cluster will immediately ban a resource from a node if it fails to start there. When FALSE, the cluster will instead check the resource's fail count against its migration-threshold." },
ed4e54
 	{ "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean,
ed4e54
 	  "Should the cluster check for active resources during startup", NULL },
ed4e54
+    {
ed4e54
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK,
ed4e54
+        NULL, "boolean", NULL, "false", &check_boolean,
ed4e54
+        "Whether to lock resources to a cleanly shut down node",
ed4e54
+        "When true, resources active on a node when it is cleanly shut down "
ed4e54
+            "are kept \"locked\" to that node (not allowed to run elsewhere) "
ed4e54
+            "until they start again on that node after it rejoins (or for at "
ed4e54
+            "most shutdown-lock-limit, if set). Stonith resources and "
ed4e54
+            "Pacemaker Remote connections are never locked. Clone and bundle "
ed4e54
+            "instances and the master role of promotable clones are currently "
ed4e54
+            "never locked, though support could be added in a future release."
ed4e54
+    },
ed4e54
+    {
ed4e54
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT,
ed4e54
+        NULL, "time", NULL, "0", &check_timer,
ed4e54
+        "Do not lock resources to a cleanly shut down node longer than this",
ed4e54
+        "If shutdown-lock is true and this is set to a nonzero time duration, "
ed4e54
+            "shutdown locks will expire after this much time has passed since "
ed4e54
+            "the shutdown was initiated, even if the node has not rejoined."
ed4e54
+    },
ed4e54
 
ed4e54
 	/* Stonith Options */
ed4e54
 	{ "stonith-enabled", NULL, "boolean", NULL, "true", &check_boolean,
ed4e54
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
ed4e54
index c9fc672..8c0d72a 100644
ed4e54
--- a/lib/pengine/unpack.c
ed4e54
+++ b/lib/pengine/unpack.c
ed4e54
@@ -319,6 +319,16 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set)
ed4e54
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
ed4e54
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
ed4e54
 
ed4e54
+    set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
ed4e54
+    crm_trace("Resources will%s be locked to cleanly shut down nodes",
ed4e54
+              (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
ed4e54
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
ed4e54
+        value = pe_pref(data_set->config_hash,
ed4e54
+                        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
ed4e54
+        data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000;
ed4e54
+        crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
ed4e54
+    }
ed4e54
+
ed4e54
     return TRUE;
ed4e54
 }
ed4e54
 
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54