Blame SOURCES/017-shutdown-lock.patch

ed4e54
From 45a6f0b051743c266c13f3ffd365baf3a9d730f6 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Tue, 14 Jan 2020 12:53:39 -0600
ed4e54
Subject: [PATCH 13/18] Low: controller: allow CRM_OP_LRM_DELETE to clear CIB
ed4e54
 only
ed4e54
ed4e54
Normally, CRM_OP_LRM_DELETE is relayed to the affected node's controller, which
ed4e54
clears the resource from the executor and CIB as well the its own bookkeeping.
ed4e54
ed4e54
Now, we want to be able to use it to clear shutdown locks for nodes that are
ed4e54
down. Let it take a new "mode" attribute, and if it is "cib", clear the
ed4e54
resource from the CIB locally without relaying the operation or doing anything
ed4e54
else.
ed4e54
---
ed4e54
 daemons/controld/controld_execd.c      |  4 +-
ed4e54
 daemons/controld/controld_messages.c   | 97 ++++++++++++++++++++++++++++++++--
ed4e54
 daemons/controld/controld_te_actions.c |  7 +++
ed4e54
 include/crm_internal.h                 |  2 +
ed4e54
 4 files changed, 106 insertions(+), 4 deletions(-)
ed4e54
ed4e54
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
ed4e54
index c0436a2..8d25fb8 100644
ed4e54
--- a/daemons/controld/controld_execd.c
ed4e54
+++ b/daemons/controld/controld_execd.c
ed4e54
@@ -1769,7 +1769,9 @@ do_lrm_invoke(long long action,
ed4e54
     crm_trace("Executor %s command from %s", crm_op, from_sys);
ed4e54
 
ed4e54
     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
ed4e54
-        crm_rsc_delete = TRUE; // Only crm_resource uses this op
ed4e54
+        if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
ed4e54
+            crm_rsc_delete = TRUE; // from crm_resource
ed4e54
+        }
ed4e54
         operation = CRMD_ACTION_DELETE;
ed4e54
 
ed4e54
     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
ed4e54
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
ed4e54
index 466c64c..689e4a0 100644
ed4e54
--- a/daemons/controld/controld_messages.c
ed4e54
+++ b/daemons/controld/controld_messages.c
ed4e54
@@ -410,6 +410,14 @@ relay_message(xmlNode * msg, gboolean originated_locally)
ed4e54
 
ed4e54
     } else if (safe_str_eq(fsa_our_uname, host_to)) {
ed4e54
         is_local = 1;
ed4e54
+    } else if (is_for_crm && safe_str_eq(task, CRM_OP_LRM_DELETE)) {
ed4e54
+        xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
ed4e54
+        const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
ed4e54
+
ed4e54
+        if (safe_str_eq(mode, XML_TAG_CIB)) {
ed4e54
+            // Local delete of an offline node's resource history
ed4e54
+            is_local = 1;
ed4e54
+        }
ed4e54
     }
ed4e54
 
ed4e54
     if (is_for_dc || is_for_dcib || is_for_te) {
ed4e54
@@ -654,6 +662,86 @@ handle_failcount_op(xmlNode * stored_msg)
ed4e54
     return I_NULL;
ed4e54
 }
ed4e54
 
ed4e54
+static enum crmd_fsa_input
ed4e54
+handle_lrm_delete(xmlNode *stored_msg)
ed4e54
+{
ed4e54
+    const char *mode = NULL;
ed4e54
+    xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
ed4e54
+
ed4e54
+    CRM_CHECK(msg_data != NULL, return I_NULL);
ed4e54
+
ed4e54
+    /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
ed4e54
+     * relay the operation to the affected node, which will unregister the
ed4e54
+     * resource from the local executor, clear the resource's history from the
ed4e54
+     * CIB, and do some bookkeeping in the controller.
ed4e54
+     *
ed4e54
+     * However, if the affected node is offline, the client will specify
ed4e54
+     * mode="cib" which means the controller receiving the operation should
ed4e54
+     * clear the resource's history from the CIB and nothing else. This is used
ed4e54
+     * to clear shutdown locks.
ed4e54
+     */
ed4e54
+    mode = crm_element_value(msg_data, PCMK__XA_MODE);
ed4e54
+    if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
ed4e54
+        // Relay to affected node
ed4e54
+        crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
ed4e54
+        return I_ROUTER;
ed4e54
+
ed4e54
+    } else {
ed4e54
+        // Delete CIB history locally (compare with do_lrm_delete())
ed4e54
+        const char *from_sys = NULL;
ed4e54
+        const char *user_name = NULL;
ed4e54
+        const char *rsc_id = NULL;
ed4e54
+        const char *node = NULL;
ed4e54
+        xmlNode *rsc_xml = NULL;
ed4e54
+        int rc = pcmk_rc_ok;
ed4e54
+
ed4e54
+        rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
ed4e54
+        CRM_CHECK(rsc_xml != NULL, return I_NULL);
ed4e54
+
ed4e54
+        rsc_id = ID(rsc_xml);
ed4e54
+        from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM);
ed4e54
+        node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET);
ed4e54
+#if ENABLE_ACL
ed4e54
+        user_name = crm_acl_get_set_user(stored_msg, F_CRM_USER, NULL);
ed4e54
+#endif
ed4e54
+        crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
ed4e54
+                  "(clearing CIB resource history only)", rsc_id, node,
ed4e54
+                  (user_name? " for user " : ""), (user_name? user_name : ""));
ed4e54
+#if ENABLE_ACL
ed4e54
+        rc = controld_delete_resource_history(rsc_id, node, user_name,
ed4e54
+                                              cib_dryrun|cib_sync_call);
ed4e54
+#endif
ed4e54
+        if (rc == pcmk_rc_ok) {
ed4e54
+            rc = controld_delete_resource_history(rsc_id, node, user_name,
ed4e54
+                                                  crmd_cib_smart_opt());
ed4e54
+        }
ed4e54
+
ed4e54
+        // Notify client if not from graph (compare with notify_deleted())
ed4e54
+        if (from_sys && strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
ed4e54
+            lrmd_event_data_t *op = NULL;
ed4e54
+            const char *from_host = crm_element_value(stored_msg,
ed4e54
+                                                      F_CRM_HOST_FROM);
ed4e54
+            const char *transition = crm_element_value(msg_data,
ed4e54
+                                                       XML_ATTR_TRANSITION_KEY);
ed4e54
+
ed4e54
+            crm_info("Notifying %s on %s that %s was%s deleted",
ed4e54
+                     from_sys, (from_host? from_host : "local node"), rsc_id,
ed4e54
+                     ((rc == pcmk_rc_ok)? "" : " not"));
ed4e54
+            op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0);
ed4e54
+            op->type = lrmd_event_exec_complete;
ed4e54
+            op->user_data = strdup(transition? transition : FAKE_TE_ID);
ed4e54
+            op->params = crm_str_table_new();
ed4e54
+            g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION),
ed4e54
+                                strdup(CRM_FEATURE_SET));
ed4e54
+            controld_rc2event(op, rc);
ed4e54
+            controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
ed4e54
+            lrmd_free_event(op);
ed4e54
+            controld_trigger_delete_refresh(from_sys, rsc_id);
ed4e54
+        }
ed4e54
+        return I_NULL;
ed4e54
+    }
ed4e54
+}
ed4e54
+
ed4e54
 /*!
ed4e54
  * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
ed4e54
  *
ed4e54
@@ -913,9 +1001,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause)
ed4e54
         crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
ed4e54
         return I_JOIN_RESULT;
ed4e54
 
ed4e54
-    } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0
ed4e54
-               || strcmp(op, CRM_OP_LRM_FAIL) == 0
ed4e54
-               || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) {
ed4e54
+    } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
ed4e54
+        return handle_lrm_delete(stored_msg);
ed4e54
+
ed4e54
+    } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
ed4e54
+               || (strcmp(op, CRM_OP_LRM_REFRESH) == 0)
ed4e54
+               || (strcmp(op, CRM_OP_REPROBE) == 0)) {
ed4e54
 
ed4e54
         crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
ed4e54
         return I_ROUTER;
ed4e54
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
ed4e54
index 948bd64..59e0b5a 100644
ed4e54
--- a/daemons/controld/controld_te_actions.c
ed4e54
+++ b/daemons/controld/controld_te_actions.c
ed4e54
@@ -107,6 +107,13 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
ed4e54
 
ed4e54
     if (!router_node) {
ed4e54
         router_node = on_node;
ed4e54
+        if (safe_str_eq(task, CRM_OP_LRM_DELETE)) {
ed4e54
+            const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
ed4e54
+
ed4e54
+            if (safe_str_eq(mode, XML_TAG_CIB)) {
ed4e54
+                router_node = fsa_our_uname;
ed4e54
+            }
ed4e54
+        }
ed4e54
     }
ed4e54
 
ed4e54
     CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
ed4e54
diff --git a/include/crm_internal.h b/include/crm_internal.h
ed4e54
index 1f25686..2fa53dd 100644
ed4e54
--- a/include/crm_internal.h
ed4e54
+++ b/include/crm_internal.h
ed4e54
@@ -216,6 +216,8 @@ pid_t pcmk_locate_sbd(void);
ed4e54
 #  define ATTRD_OP_SYNC_RESPONSE "sync-response"
ed4e54
 #  define ATTRD_OP_CLEAR_FAILURE "clear-failure"
ed4e54
 
ed4e54
+#  define PCMK__XA_MODE             "mode"
ed4e54
+
ed4e54
 #  define PCMK_ENV_PHYSICAL_HOST "physical_host"
ed4e54
 
ed4e54
 
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54