Blame SOURCES/shutdown-lock-01.patch

af918f
From c3e2a63e08046b25f1773504d8c1ab431d3abf78 Mon Sep 17 00:00:00 2001
af918f
From: Ken Gaillot <kgaillot@redhat.com>
af918f
Date: Thu, 21 Nov 2019 14:48:02 -0600
af918f
Subject: [PATCH 01/10] Feature: scheduler: add shutdown lock cluster options
af918f
af918f
This commit adds shutdown-lock and shutdown-lock-limit options (just the
af918f
options, not the feature itself).
af918f
af918f
shutdown-lock defaults to false, which preserves current behavior. The intended
af918f
purpose of setting it to true is to *prevent* recovery of a node's resources
af918f
elsewhere when the node is cleanly shut down, until the node rejoins. If
af918f
shutdown-lock-limit is set to a nonzero time duration, the cluster will
af918f
be allowed to recover the resources if the node has not rejoined within this
af918f
time.
af918f
af918f
The use case is when rebooting a node (such as for software updates) is done by
af918f
cluster-unaware system administrators during scheduled maintenance windows,
af918f
resources prefer specific nodes, and resource recovery time is high.
af918f
---
af918f
 include/crm/msg_xml.h        |  6 +++++-
af918f
 include/crm/pengine/status.h |  2 ++
af918f
 lib/pengine/common.c         | 26 ++++++++++++++++++++++++--
af918f
 lib/pengine/unpack.c         | 10 ++++++++++
af918f
 4 files changed, 41 insertions(+), 3 deletions(-)
af918f
af918f
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
af918f
index de99959..50fdf45 100644
af918f
--- a/include/crm/msg_xml.h
af918f
+++ b/include/crm/msg_xml.h
af918f
@@ -1,5 +1,7 @@
af918f
 /*
af918f
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
af918f
+ * Copyright 2004-2020 the Pacemaker project contributors
af918f
+ *
af918f
+ * The version control history for this file may have further details.
af918f
  *
af918f
  * This program is free software; you can redistribute it and/or
af918f
  * modify it under the terms of the GNU Lesser General Public
af918f
@@ -378,6 +380,8 @@
af918f
 #  define XML_CONFIG_ATTR_RECHECK	"cluster-recheck-interval"
af918f
 #  define XML_CONFIG_ATTR_FENCE_REACTION	"fence-reaction"
f0d7ba
 #  define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY "priority-fencing-delay"
af918f
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK         "shutdown-lock"
af918f
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT   "shutdown-lock-limit"
af918f
 
af918f
 #  define XML_ALERT_ATTR_PATH		"path"
af918f
 #  define XML_ALERT_ATTR_TIMEOUT	"timeout"
af918f
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
af918f
index 415f60e..c6d4bdb 100644
af918f
--- a/include/crm/pengine/status.h
af918f
+++ b/include/crm/pengine/status.h
af918f
@@ -83,6 +83,7 @@ enum pe_find {
af918f
 #  define pe_flag_start_failure_fatal   0x00001000ULL
af918f
 #  define pe_flag_remove_after_stop     0x00002000ULL
af918f
 #  define pe_flag_startup_fencing       0x00004000ULL
af918f
+#  define pe_flag_shutdown_lock         0x00008000ULL
af918f
 
af918f
 #  define pe_flag_startup_probes        0x00010000ULL
af918f
 #  define pe_flag_have_status           0x00020000ULL
f0d7ba
@@ -147,6 +148,7 @@ typedef struct pe_working_set_s {
af918f
 
af918f
     GList *param_check; // History entries that need to be checked
af918f
     GList *stop_needed; // Containers that need stop actions
af918f
+    guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
f0d7ba
     int ninstances;     // Total number of resource instances
f0d7ba
     int priority_fencing_delay; // Priority fencing delay
af918f
 } pe_working_set_t;
af918f
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
af918f
index e82434a..fc976d3 100644
af918f
--- a/lib/pengine/common.c
af918f
+++ b/lib/pengine/common.c
af918f
@@ -1,5 +1,7 @@
af918f
-/* 
af918f
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
af918f
+/*
af918f
+ * Copyright 2004-2020 the Pacemaker project contributors
af918f
+ *
af918f
+ * The version control history for this file may have further details.
af918f
  * 
af918f
  * This library is free software; you can redistribute it and/or
af918f
  * modify it under the terms of the GNU Lesser General Public
af918f
@@ -101,6 +103,26 @@ pe_cluster_option pe_opts[] = {
af918f
 	  "When set to TRUE, the cluster will immediately ban a resource from a node if it fails to start there. When FALSE, the cluster will instead check the resource's fail count against its migration-threshold." },
af918f
 	{ "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean,
af918f
 	  "Should the cluster check for active resources during startup", NULL },
af918f
+    {
af918f
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK,
af918f
+        NULL, "boolean", NULL, "false", &check_boolean,
af918f
+        "Whether to lock resources to a cleanly shut down node",
af918f
+        "When true, resources active on a node when it is cleanly shut down "
af918f
+            "are kept \"locked\" to that node (not allowed to run elsewhere) "
af918f
+            "until they start again on that node after it rejoins (or for at "
af918f
+            "most shutdown-lock-limit, if set). Stonith resources and "
af918f
+            "Pacemaker Remote connections are never locked. Clone and bundle "
af918f
+            "instances and the master role of promotable clones are currently "
af918f
+            "never locked, though support could be added in a future release."
af918f
+    },
af918f
+    {
af918f
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT,
af918f
+        NULL, "time", NULL, "0", &check_timer,
af918f
+        "Do not lock resources to a cleanly shut down node longer than this",
af918f
+        "If shutdown-lock is true and this is set to a nonzero time duration, "
af918f
+            "shutdown locks will expire after this much time has passed since "
af918f
+            "the shutdown was initiated, even if the node has not rejoined."
af918f
+    },
af918f
 
af918f
 	/* Stonith Options */
af918f
 	{ "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean,
af918f
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
af918f
index 24e56f5..7b0d837 100644
af918f
--- a/lib/pengine/unpack.c
af918f
+++ b/lib/pengine/unpack.c
af918f
@@ -340,6 +340,16 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set)
af918f
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
af918f
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
af918f
 
af918f
+    set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
af918f
+    crm_trace("Resources will%s be locked to cleanly shut down nodes",
af918f
+              (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
af918f
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
af918f
+        value = pe_pref(data_set->config_hash,
af918f
+                        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
af918f
+        data_set->shutdown_lock = crm_get_interval(value) / 1000;
af918f
+        crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
af918f
+    }
af918f
+
af918f
     return TRUE;
af918f
 }
af918f
 
af918f
-- 
af918f
1.8.3.1
af918f