Blame SOURCES/shutdown-lock-01.patch

b04960
From c3e2a63e08046b25f1773504d8c1ab431d3abf78 Mon Sep 17 00:00:00 2001
b04960
From: Ken Gaillot <kgaillot@redhat.com>
b04960
Date: Thu, 21 Nov 2019 14:48:02 -0600
b04960
Subject: [PATCH 01/10] Feature: scheduler: add shutdown lock cluster options
b04960
b04960
This commit adds shutdown-lock and shutdown-lock-limit options (just the
b04960
options, not the feature itself).
b04960
b04960
shutdown-lock defaults to false, which preserves current behavior. The intended
b04960
purpose of setting it to true is to *prevent* recovery of a node's resources
b04960
elsewhere when the node is cleanly shut down, until the node rejoins. If
b04960
shutdown-lock-limit is set to a nonzero time duration, the cluster will
b04960
be allowed to recover the resources if the node has not rejoined within this
b04960
time.
b04960
b04960
The use case is when rebooting a node (such as for software updates) is done by
b04960
cluster-unaware system administrators during scheduled maintenance windows,
b04960
resources prefer specific nodes, and resource recovery time is high.
b04960
---
b04960
 include/crm/msg_xml.h        |  6 +++++-
b04960
 include/crm/pengine/status.h |  2 ++
b04960
 lib/pengine/common.c         | 26 ++++++++++++++++++++++++--
b04960
 lib/pengine/unpack.c         | 10 ++++++++++
b04960
 4 files changed, 41 insertions(+), 3 deletions(-)
b04960
b04960
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
b04960
index de99959..50fdf45 100644
b04960
--- a/include/crm/msg_xml.h
b04960
+++ b/include/crm/msg_xml.h
b04960
@@ -1,5 +1,7 @@
b04960
 /*
b04960
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
b04960
+ * Copyright 2004-2020 the Pacemaker project contributors
b04960
+ *
b04960
+ * The version control history for this file may have further details.
b04960
  *
b04960
  * This program is free software; you can redistribute it and/or
b04960
  * modify it under the terms of the GNU Lesser General Public
b04960
@@ -378,6 +380,8 @@
b04960
 #  define XML_CONFIG_ATTR_FORCE_QUIT	"shutdown-escalation"
b04960
 #  define XML_CONFIG_ATTR_RECHECK	"cluster-recheck-interval"
b04960
 #  define XML_CONFIG_ATTR_FENCE_REACTION	"fence-reaction"
b04960
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK         "shutdown-lock"
b04960
+#  define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT   "shutdown-lock-limit"
b04960
 
b04960
 #  define XML_ALERT_ATTR_PATH		"path"
b04960
 #  define XML_ALERT_ATTR_TIMEOUT	"timeout"
b04960
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
b04960
index 415f60e..c6d4bdb 100644
b04960
--- a/include/crm/pengine/status.h
b04960
+++ b/include/crm/pengine/status.h
b04960
@@ -83,6 +83,7 @@ enum pe_find {
b04960
 #  define pe_flag_start_failure_fatal   0x00001000ULL
b04960
 #  define pe_flag_remove_after_stop     0x00002000ULL
b04960
 #  define pe_flag_startup_fencing       0x00004000ULL
b04960
+#  define pe_flag_shutdown_lock         0x00008000ULL
b04960
 
b04960
 #  define pe_flag_startup_probes        0x00010000ULL
b04960
 #  define pe_flag_have_status           0x00020000ULL
b04960
@@ -148,6 +149,7 @@ typedef struct pe_working_set_s {
b04960
     GList *param_check; // History entries that need to be checked
b04960
     GList *stop_needed; // Containers that need stop actions
b04960
     int ninstances;     // Total number of resource instances
b04960
+    guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
b04960
 } pe_working_set_t;
b04960
 
b04960
 enum pe_check_parameters {
b04960
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
b04960
index e82434a..fc976d3 100644
b04960
--- a/lib/pengine/common.c
b04960
+++ b/lib/pengine/common.c
b04960
@@ -1,5 +1,7 @@
b04960
-/* 
b04960
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
b04960
+/*
b04960
+ * Copyright 2004-2020 the Pacemaker project contributors
b04960
+ *
b04960
+ * The version control history for this file may have further details.
b04960
  * 
b04960
  * This library is free software; you can redistribute it and/or
b04960
  * modify it under the terms of the GNU Lesser General Public
b04960
@@ -101,6 +103,26 @@ pe_cluster_option pe_opts[] = {
b04960
 	  "When set to TRUE, the cluster will immediately ban a resource from a node if it fails to start there. When FALSE, the cluster will instead check the resource's fail count against its migration-threshold." },
b04960
 	{ "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean,
b04960
 	  "Should the cluster check for active resources during startup", NULL },
b04960
+    {
b04960
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK,
b04960
+        NULL, "boolean", NULL, "false", &check_boolean,
b04960
+        "Whether to lock resources to a cleanly shut down node",
b04960
+        "When true, resources active on a node when it is cleanly shut down "
b04960
+            "are kept \"locked\" to that node (not allowed to run elsewhere) "
b04960
+            "until they start again on that node after it rejoins (or for at "
b04960
+            "most shutdown-lock-limit, if set). Stonith resources and "
b04960
+            "Pacemaker Remote connections are never locked. Clone and bundle "
b04960
+            "instances and the master role of promotable clones are currently "
b04960
+            "never locked, though support could be added in a future release."
b04960
+    },
b04960
+    {
b04960
+        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT,
b04960
+        NULL, "time", NULL, "0", &check_timer,
b04960
+        "Do not lock resources to a cleanly shut down node longer than this",
b04960
+        "If shutdown-lock is true and this is set to a nonzero time duration, "
b04960
+            "shutdown locks will expire after this much time has passed since "
b04960
+            "the shutdown was initiated, even if the node has not rejoined."
b04960
+    },
b04960
 
b04960
 	/* Stonith Options */
b04960
 	{ "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean,
b04960
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
b04960
index 24e56f5..7b0d837 100644
b04960
--- a/lib/pengine/unpack.c
b04960
+++ b/lib/pengine/unpack.c
b04960
@@ -340,6 +340,16 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set)
b04960
     data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
b04960
     crm_trace("Placement strategy: %s", data_set->placement_strategy);
b04960
 
b04960
+    set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
b04960
+    crm_trace("Resources will%s be locked to cleanly shut down nodes",
b04960
+              (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
b04960
+    if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
b04960
+        value = pe_pref(data_set->config_hash,
b04960
+                        XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
b04960
+        data_set->shutdown_lock = crm_get_interval(value) / 1000;
b04960
+        crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
b04960
+    }
b04960
+
b04960
     return TRUE;
b04960
 }
b04960
 
b04960
-- 
b04960
1.8.3.1
b04960