af918f
From 538aeef2523017a9b5c1ba950c984ed16efdc9a0 Mon Sep 17 00:00:00 2001
af918f
From: Ken Gaillot <kgaillot@redhat.com>
af918f
Date: Fri, 22 Nov 2019 17:03:20 -0600
af918f
Subject: [PATCH 04/10] Low: controller: mark shutdown-locked resources in
af918f
 resource history
af918f
af918f
When a graph action indicates that the resource should be shutdown-locked
af918f
to its node, remember the shutdown lock time in active_op_t so we can remember
af918f
that when the result comes back. When the result does come back, add
af918f
"shutdown-lock" to its lrm_resource entry in the CIB status section -- as
af918f
the timestamp if it's a successful stop or a probe finding the resource
af918f
inactive, or as 0 to clear the lock for any other operation.
af918f
---
af918f
 crmd/control.c      |  9 ++++++-
af918f
 crmd/crmd_lrm.h     |  1 +
af918f
 crmd/crmd_utils.h   |  1 +
af918f
 crmd/lrm.c          | 46 ++++++++++++++++++++++++++++++++----
af918f
 crmd/te_callbacks.c | 68 +++++++++++++++++++++++++++++++++++++----------------
af918f
 5 files changed, 99 insertions(+), 26 deletions(-)
af918f
af918f
diff --git a/crmd/control.c b/crmd/control.c
af918f
index cd4223f..47dabf1 100644
af918f
--- a/crmd/control.c
af918f
+++ b/crmd/control.c
af918f
@@ -1,5 +1,5 @@
af918f
 /*
af918f
- * Copyright 2004-2019 the Pacemaker project contributors
af918f
+ * Copyright 2004-2020 the Pacemaker project contributors
af918f
  *
af918f
  * The version control history for this file may have further details.
af918f
  *
af918f
@@ -51,6 +51,7 @@ gboolean fsa_has_quorum = FALSE;
af918f
 crm_trigger_t *fsa_source = NULL;
af918f
 crm_trigger_t *config_read = NULL;
af918f
 bool no_quorum_suicide_escalation = FALSE;
af918f
+bool controld_shutdown_lock_enabled = false;
af918f
 
af918f
 /*	 A_HA_CONNECT	*/
af918f
 void
af918f
@@ -971,7 +972,10 @@ pe_cluster_option crmd_opts[] = {
af918f
         { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number,
af918f
           "How many times stonith can fail before it will no longer be attempted on a target"
af918f
         },   
af918f
+
af918f
+    // Already documented in libpe_status (other values must be kept identical)
af918f
 	{ "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
af918f
+    { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", &check_boolean, NULL, NULL },
af918f
 
af918f
 #if SUPPORT_PLUGIN
af918f
 	{ XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." },
af918f
@@ -1094,6 +1098,9 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
af918f
     value = crmd_pref(config_hash, "crmd-finalization-timeout");
af918f
     finalization_timer->period_ms = crm_get_msec(value);
af918f
 
af918f
+    value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
af918f
+    controld_shutdown_lock_enabled = crm_is_true(value);
af918f
+
af918f
 #if SUPPORT_COROSYNC
af918f
     if (is_classic_ais_cluster()) {
af918f
         value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES);
af918f
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
af918f
index 7d35264..ecc2511 100644
af918f
--- a/crmd/crmd_lrm.h
af918f
+++ b/crmd/crmd_lrm.h
af918f
@@ -46,6 +46,7 @@ typedef struct active_op_s {
af918f
     int interval;
af918f
     uint32_t flags; // bitmask of active_op_e
af918f
     unsigned int start_time;
af918f
+    time_t lock_time;
af918f
     char *rsc_id;
af918f
     char *op_type;
af918f
     char *op_key;
af918f
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
af918f
index eeaa8b7..9ecce88 100644
af918f
--- a/crmd/crmd_utils.h
af918f
+++ b/crmd/crmd_utils.h
af918f
@@ -63,6 +63,7 @@ fsa_cib_anon_update(const char *section, xmlNode *data) {
af918f
 }
af918f
 
af918f
 extern gboolean fsa_has_quorum;
af918f
+extern bool controld_shutdown_lock_enabled;
af918f
 extern int last_peer_update;
af918f
 extern int last_resource_update;
af918f
 
af918f
diff --git a/crmd/lrm.c b/crmd/lrm.c
af918f
index 27fdd8b..9156ab8 100644
af918f
--- a/crmd/lrm.c
af918f
+++ b/crmd/lrm.c
af918f
@@ -47,7 +47,8 @@ static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
af918f
 
af918f
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
af918f
                                          int log_level);
af918f
-static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
af918f
+static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc,
af918f
+                              lrmd_event_data_t *op, time_t lock_time);
af918f
 
af918f
 static void
af918f
 lrm_connection_destroy(void)
af918f
@@ -2168,7 +2169,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
af918f
     crm_debug("Recording pending op %s_%s_%d on %s in the CIB",
af918f
               op->rsc_id, op->op_type, op->interval, node_name);
af918f
 
af918f
-    do_update_resource(node_name, rsc, op);
af918f
+    do_update_resource(node_name, rsc, op, 0);
af918f
 }
af918f
 
af918f
 static void
af918f
@@ -2309,7 +2310,11 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
af918f
         pending->op_key = strdup(op_id);
af918f
         pending->rsc_id = strdup(rsc->id);
af918f
         pending->start_time = time(NULL);
af918f
-        pending->user_data = strdup(op->user_data);
af918f
+        pending->user_data = op->user_data? strdup(op->user_data) : NULL;
af918f
+        if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
af918f
+                                    &(pending->lock_time)) != pcmk_ok) {
af918f
+            pending->lock_time = 0;
af918f
+        }
af918f
         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
af918f
 
af918f
         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
af918f
@@ -2356,8 +2361,28 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use
af918f
     }
af918f
 }
af918f
 
af918f
+/* Only successful stops, and probes that found the resource inactive, get locks
af918f
+ * recorded in the history. This ensures the resource stays locked to the node
af918f
+ * until it is active there again after the node comes back up.
af918f
+ */
af918f
+static bool
af918f
+should_preserve_lock(lrmd_event_data_t *op)
af918f
+{
af918f
+    if (!controld_shutdown_lock_enabled) {
af918f
+        return false;
af918f
+    }
af918f
+    if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) {
af918f
+        return true;
af918f
+    }
af918f
+    if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) {
af918f
+        return true;
af918f
+    }
af918f
+    return false;
af918f
+}
af918f
+
af918f
 static int
af918f
-do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
af918f
+do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc,
af918f
+                   lrmd_event_data_t *op, time_t lock_time)
af918f
 {
af918f
 /*
af918f
   <status>
af918f
@@ -2412,6 +2437,16 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data
af918f
         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
af918f
         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
af918f
         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
af918f
+        if (lock_time != 0) {
af918f
+            /* Actions on a locked resource should either preserve the lock by
af918f
+             * recording it with the action result, or clear it.
af918f
+             */
af918f
+            if (!should_preserve_lock(op)) {
af918f
+                lock_time = 0;
af918f
+            }
af918f
+            crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
af918f
+                           (long long) lock_time);
af918f
+        }
af918f
 
af918f
         if (op->params) {
af918f
             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
af918f
@@ -2600,7 +2635,8 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
af918f
         if (controld_action_is_recordable(op->op_type)) {
af918f
             if (node_name && rsc) {
af918f
                 // We should record the result, and happily, we can
af918f
-                update_id = do_update_resource(node_name, rsc, op);
af918f
+                update_id = do_update_resource(node_name, rsc, op,
af918f
+                                               pending? pending->lock_time : 0);
af918f
                 need_direct_ack = FALSE;
af918f
 
af918f
             } else if (op->rsc_deleted) {
af918f
diff --git a/crmd/te_callbacks.c b/crmd/te_callbacks.c
af918f
index 9faf932..46a4393 100644
af918f
--- a/crmd/te_callbacks.c
af918f
+++ b/crmd/te_callbacks.c
af918f
@@ -1,5 +1,7 @@
af918f
 /*
af918f
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
af918f
+ * Copyright 2004-2020 the Pacemaker project contributors
af918f
+ *
af918f
+ * The version control history for this file may have further details.
af918f
  *
af918f
  * This program is free software; you can redistribute it and/or
af918f
  * modify it under the terms of the GNU General Public
af918f
@@ -52,6 +54,18 @@ update_stonith_max_attempts(const char* value)
af918f
        stonith_max_attempts = crm_int_helper(value, NULL);
af918f
     }
af918f
 }
af918f
+
af918f
+// An explicit shutdown-lock of 0 means the lock has been cleared
af918f
+static bool
af918f
+shutdown_lock_cleared(xmlNode *lrm_resource)
af918f
+{
af918f
+    time_t shutdown_lock = 0;
af918f
+
af918f
+    return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
af918f
+                                    &shutdown_lock) == pcmk_ok)
af918f
+           && (shutdown_lock == 0);
af918f
+}
af918f
+
af918f
 static void
af918f
 te_update_diff_v1(const char *event, xmlNode *diff)
af918f
 {
af918f
@@ -130,33 +144,42 @@ te_update_diff_v1(const char *event, xmlNode *diff)
af918f
     }
af918f
     freeXpathObject(xpathObj);
af918f
 
af918f
+    // Check for lrm_resource entries
af918f
+    xpathObj = xpath_search(diff,
af918f
+                            "//" F_CIB_UPDATE_RESULT
af918f
+                            "//" XML_TAG_DIFF_ADDED
af918f
+                            "//" XML_LRM_TAG_RESOURCE);
af918f
+    max = numXpathResults(xpathObj);
af918f
+
af918f
     /*
af918f
-     * Updates by, or in response to, TE actions will never contain updates
af918f
-     * for more than one resource at a time, so such updates indicate an
af918f
-     * LRM refresh.
af918f
-     *
af918f
-     * In that case, start a new transition rather than check each result
af918f
-     * individually, which can result in _huge_ speedups in large clusters.
af918f
+     * Updates by, or in response to, graph actions will never affect more than
af918f
+     * one resource at a time, so such updates indicate an LRM refresh. In that
af918f
+     * case, start a new transition rather than check each result individually,
af918f
+     * which can result in _huge_ speedups in large clusters.
af918f
      *
af918f
      * Unfortunately, we can only do so when there are no pending actions.
af918f
      * Otherwise, we could mistakenly throw away those results here, and
af918f
      * the cluster will stall waiting for them and time out the operation.
af918f
      */
af918f
-    if (transition_graph->pending == 0) {
af918f
-        xpathObj = xpath_search(diff,
af918f
-                                "//" F_CIB_UPDATE_RESULT
af918f
-                                "//" XML_TAG_DIFF_ADDED
af918f
-                                "//" XML_LRM_TAG_RESOURCE);
af918f
-        max = numXpathResults(xpathObj);
af918f
-        if (max > 1) {
af918f
-            crm_debug("Ignoring resource operation updates due to LRM refresh of %d resources",
af918f
-                      max);
af918f
-            crm_log_xml_trace(diff, "lrm-refresh");
af918f
-            abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
af918f
-            goto bail;
af918f
+    if ((transition_graph->pending == 0) && (max > 1)) {
af918f
+        crm_debug("Ignoring resource operation updates due to history refresh of %d resources",
af918f
+                  max);
af918f
+        crm_log_xml_trace(diff, "lrm-refresh");
af918f
+        abort_transition(INFINITY, tg_restart, "History refresh", NULL);
af918f
+        goto bail;
af918f
+    }
af918f
+
af918f
+    if (max == 1) {
af918f
+        xmlNode *lrm_resource = getXpathResult(xpathObj, 0);
af918f
+
af918f
+        if (shutdown_lock_cleared(lrm_resource)) {
af918f
+            // @TODO would be more efficient to abort once after transition done
af918f
+            abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
af918f
+                             lrm_resource);
af918f
+            // Still process results, so we stop timers and update failcounts
af918f
         }
af918f
-        freeXpathObject(xpathObj);
af918f
     }
af918f
+    freeXpathObject(xpathObj);
af918f
 
af918f
     /* Process operation updates */
af918f
     xpathObj =
af918f
@@ -229,6 +252,11 @@ process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
af918f
          rsc_op = __xml_next(rsc_op)) {
af918f
         process_graph_event(rsc_op, node);
af918f
     }
af918f
+    if (shutdown_lock_cleared(lrm_resource)) {
af918f
+        // @TODO would be more efficient to abort once after transition done
af918f
+        abort_transition(INFINITY, tg_restart, "Shutdown lock cleared",
af918f
+                         lrm_resource);
af918f
+    }
af918f
 }
af918f
 
af918f
 static void
af918f
-- 
af918f
1.8.3.1
af918f