Blame SOURCES/002-failed-monitors.patch

4c8e44
From 5470f1d9c776dbf753e015fa96153b6a63c17b83 Mon Sep 17 00:00:00 2001
4c8e44
From: "Gao,Yan" <ygao@suse.com>
4c8e44
Date: Thu, 9 May 2019 13:24:35 +0200
4c8e44
Subject: [PATCH] Fix: controller: confirm cancel of failed monitors
4c8e44
4c8e44
Usually after a monitor has been cancelled from executor, contoller
4c8e44
erases the corresponding lrm_rsc_op from the cib, and DC will confirm
4c8e44
the cancel action by process_op_deletion() according to the cib diff.
4c8e44
4c8e44
But if a monitor has failed, the lrm_rsc_op will be recorded as
4c8e44
"last_failure". When cancelling it, the lrm_rsc_op won't get erased from
4c8e44
the cib given the logic on purpose in erase_lrm_history_by_op(). So that
4c8e44
the cancel action won't have a chance to get confirmed by DC with
4c8e44
process_op_deletion().
4c8e44
4c8e44
Previously cluster transition would get stuck waiting for the remaining
4c8e44
action timer to time out.
4c8e44
4c8e44
This commit fixes the issue by directly acknowledging the cancel action
4c8e44
in this case and enabling DC to be able to confirm it.
4c8e44
4c8e44
This also moves get_node_id() function into controld_utils.c for common
4c8e44
use.
4c8e44
4c8e44
Producer:
4c8e44
```
4c8e44
 # Insert a 10s sleep in the monitor action of RA
4c8e44
 # /usr/lib/ocf/resource.d/pacemaker/Stateful:
4c8e44
4c8e44
  stateful_monitor() {
4c8e44
 +    sleep 10
4c8e44
      stateful_check_state "master"
4c8e44
4c8e44
 # Add a promotable clone resource:
4c8e44
4c8e44
 crm configure primitive stateful ocf:pacemaker:Stateful \
4c8e44
         op monitor interval=5 role=Master \
4c8e44
         op monitor interval=10 role=Slave
4c8e44
 crm configure clone p-clone stateful \
4c8e44
         meta promotable=true
4c8e44
4c8e44
 # Wait for the resource instance to be started, promoted to be master,
4c8e44
 # and monitor for master role to complete.
4c8e44
4c8e44
 # Set is-managed=false for the promotable clone:
4c8e44
 crm_resource --meta -p is-managed -v false -r p-clone
4c8e44
4c8e44
 # Change the status of the master instance to be slave and immediately
4c8e44
 # enforce refresh of it:
4c8e44
 echo slave > /var/run/Stateful-stateful.state; crm_resource --refresh -r stateful --force
4c8e44
4c8e44
 # Wait for probe to complete, and then monitor for slave role to be
4c8e44
 # issued:
4c8e44
 sleep 15
4c8e44
4c8e44
 # While the monitor for slave role is still in progress, change the
4c8e44
 # status to be master again:
4c8e44
 echo master > /var/run/Stateful-stateful.state
4c8e44
4c8e44
 # The monitor for slave role returns error. Cluster issues monitor for
4c8e44
 # master role instead and tries to cancel the failed one for slave role.
4c8e44
 # But cluster transition gets stuck. Depending on the monitor timeout
4c8e44
 # configured for the slave role plus cluster-delay, only after that
4c8e44
 # controller eventually says:
4c8e44
4c8e44
 pacemaker-controld[21205] error: Node opensuse150 did not send cancel result (via controller) within 20000ms (action timeout plus cluster-delay)
4c8e44
 pacemaker-controld[21205] error: [Action    1]: In-flight rsc op stateful_monitor_10000            on opensuse150 (priority: 0, waiting: none)
4c8e44
 pacemaker-controld[21205] notice: Transition 6 aborted: Action lost
4c8e44
4c8e44
```
4c8e44
---
4c8e44
 daemons/controld/controld_execd.c        | 38 ++++++++++++++++++++++++++++++++
4c8e44
 daemons/controld/controld_te_callbacks.c | 21 ++----------------
4c8e44
 daemons/controld/controld_te_events.c    | 32 +++++++++++++++++++++++++++
4c8e44
 daemons/controld/controld_transition.h   |  1 +
4c8e44
 daemons/controld/controld_utils.c        | 13 +++++++++++
4c8e44
 daemons/controld/controld_utils.h        |  2 ++
4c8e44
 6 files changed, 88 insertions(+), 19 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
4c8e44
index 976fed1..8282fed 100644
4c8e44
--- a/daemons/controld/controld_execd.c
4c8e44
+++ b/daemons/controld/controld_execd.c
4c8e44
@@ -2476,6 +2476,30 @@ unescape_newlines(const char *string)
4c8e44
     return ret;
4c8e44
 }
4c8e44
 
4c8e44
+static bool
4c8e44
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
4c8e44
+                    const char * op_type, guint interval_ms)
4c8e44
+{
4c8e44
+    rsc_history_t *entry = NULL;
4c8e44
+
4c8e44
+    CRM_CHECK(lrm_state != NULL, return FALSE);
4c8e44
+    CRM_CHECK(rsc_id != NULL, return FALSE);
4c8e44
+    CRM_CHECK(op_type != NULL, return FALSE);
4c8e44
+
4c8e44
+    entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
4c8e44
+    if (entry == NULL || entry->failed == NULL) {
4c8e44
+        return FALSE;
4c8e44
+    }
4c8e44
+
4c8e44
+    if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
4c8e44
+        && safe_str_eq(entry->failed->op_type, op_type)
4c8e44
+        && entry->failed->interval_ms == interval_ms) {
4c8e44
+        return TRUE;
4c8e44
+    }
4c8e44
+
4c8e44
+    return FALSE;
4c8e44
+}
4c8e44
+
4c8e44
 void
4c8e44
 process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
4c8e44
                   struct recurring_op_s *pending, xmlNode *action_xml)
4c8e44
@@ -2605,6 +2629,20 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
4c8e44
             erase_lrm_history_by_op(lrm_state, op);
4c8e44
         }
4c8e44
 
4c8e44
+        /* If the recurring operation had failed, the lrm_rsc_op is recorded as
4c8e44
+         * "last_failure" which won't get erased from the cib given the logic on
4c8e44
+         * purpose in erase_lrm_history_by_op(). So that the cancel action won't
4c8e44
+         * have a chance to get confirmed by DC with process_op_deletion().
4c8e44
+         * Cluster transition would get stuck waiting for the remaining action
4c8e44
+         * timer to time out.
4c8e44
+         *
4c8e44
+         * Directly acknowledge the cancel operation in this case.
4c8e44
+         */
4c8e44
+        if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
4c8e44
+                                pending->op_type, pending->interval_ms)) {
4c8e44
+            need_direct_ack = TRUE;
4c8e44
+        }
4c8e44
+
4c8e44
     } else if (op->rsc_deleted) {
4c8e44
         /* This recurring operation was cancelled (but not by us, and the
4c8e44
          * executor does not have resource information, likely due to resource
4c8e44
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
4c8e44
index 51d908e..22b5f4b 100644
4c8e44
--- a/daemons/controld/controld_te_callbacks.c
4c8e44
+++ b/daemons/controld/controld_te_callbacks.c
4c8e44
@@ -32,19 +32,6 @@ static unsigned long int stonith_max_attempts = 10;
4c8e44
 /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
4c8e44
 #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
4c8e44
 
4c8e44
-static const char *
4c8e44
-get_node_id(xmlNode * rsc_op)
4c8e44
-{
4c8e44
-    xmlNode *node = rsc_op;
4c8e44
-
4c8e44
-    while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
4c8e44
-        node = node->parent;
4c8e44
-    }
4c8e44
-
4c8e44
-    CRM_CHECK(node != NULL, return NULL);
4c8e44
-    return ID(node);
4c8e44
-}
4c8e44
-
4c8e44
 void
4c8e44
 update_stonith_max_attempts(const char* value)
4c8e44
 {
4c8e44
@@ -374,12 +361,8 @@ process_op_deletion(const char *xpath, xmlNode *change)
4c8e44
     node_uuid = extract_node_uuid(xpath);
4c8e44
     cancel = get_cancel_action(key, node_uuid);
4c8e44
     if (cancel) {
4c8e44
-        crm_info("Cancellation of %s on %s confirmed (%d)",
4c8e44
-                 key, node_uuid, cancel->id);
4c8e44
-        stop_te_timer(cancel->timer);
4c8e44
-        te_action_confirmed(cancel);
4c8e44
-        update_graph(transition_graph, cancel);
4c8e44
-        trigger_graph();
4c8e44
+        confirm_cancel_action(cancel);
4c8e44
+
4c8e44
     } else {
4c8e44
         abort_transition(INFINITY, tg_restart, "Resource operation removal",
4c8e44
                          change);
4c8e44
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
4c8e44
index c0d096f..b7b48a4 100644
4c8e44
--- a/daemons/controld/controld_te_events.c
4c8e44
+++ b/daemons/controld/controld_te_events.c
4c8e44
@@ -355,6 +355,27 @@ get_cancel_action(const char *id, const char *node)
4c8e44
     return NULL;
4c8e44
 }
4c8e44
 
4c8e44
+void
4c8e44
+confirm_cancel_action(crm_action_t *cancel)
4c8e44
+{
4c8e44
+    const char *op_key = NULL;
4c8e44
+    const char *node_name = NULL;
4c8e44
+
4c8e44
+    CRM_ASSERT(cancel != NULL);
4c8e44
+
4c8e44
+    op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
4c8e44
+    node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
4c8e44
+
4c8e44
+    stop_te_timer(cancel->timer);
4c8e44
+    te_action_confirmed(cancel);
4c8e44
+    update_graph(transition_graph, cancel);
4c8e44
+
4c8e44
+    crm_info("Cancellation of %s on %s confirmed (action %d)",
4c8e44
+             op_key, node_name, cancel->id);
4c8e44
+
4c8e44
+    trigger_graph();
4c8e44
+}
4c8e44
+
4c8e44
 /* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
4c8e44
 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
4c8e44
                      "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
4c8e44
@@ -471,6 +492,17 @@ process_graph_event(xmlNode *event, const char *event_node)
4c8e44
             /* Recurring actions have the transition number they were first
4c8e44
              * scheduled in.
4c8e44
              */
4c8e44
+
4c8e44
+            if (status == PCMK_LRM_OP_CANCELLED) {
4c8e44
+                const char *node_id = get_node_id(event);
4c8e44
+
4c8e44
+                action = get_cancel_action(id, node_id);
4c8e44
+                if (action) {
4c8e44
+                    confirm_cancel_action(action);
4c8e44
+                }
4c8e44
+                goto bail;
4c8e44
+            }
4c8e44
+
4c8e44
             desc = "arrived after initial scheduling";
4c8e44
             abort_transition(INFINITY, tg_restart, "Change in recurring result",
4c8e44
                              event);
4c8e44
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
4c8e44
index 0a33599..a162f99 100644
4c8e44
--- a/daemons/controld/controld_transition.h
4c8e44
+++ b/daemons/controld/controld_transition.h
4c8e44
@@ -25,6 +25,7 @@ void execute_stonith_cleanup(void);
4c8e44
 /* tengine */
4c8e44
 extern crm_action_t *match_down_event(const char *target);
4c8e44
 extern crm_action_t *get_cancel_action(const char *id, const char *node);
4c8e44
+void confirm_cancel_action(crm_action_t *cancel);
4c8e44
 
4c8e44
 void controld_record_action_timeout(crm_action_t *action);
4c8e44
 extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node);
4c8e44
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
4c8e44
index ca7e15d..35922f0 100644
4c8e44
--- a/daemons/controld/controld_utils.c
4c8e44
+++ b/daemons/controld/controld_utils.c
4c8e44
@@ -1073,3 +1073,16 @@ feature_set_compatible(const char *dc_version, const char *join_version)
4c8e44
     // DC's minor version must be the same or older
4c8e44
     return dc_v <= join_v;
4c8e44
 }
4c8e44
+
4c8e44
+const char *
4c8e44
+get_node_id(xmlNode *lrm_rsc_op)
4c8e44
+{
4c8e44
+    xmlNode *node = lrm_rsc_op;
4c8e44
+
4c8e44
+    while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
4c8e44
+        node = node->parent;
4c8e44
+    }
4c8e44
+
4c8e44
+    CRM_CHECK(node != NULL, return NULL);
4c8e44
+    return ID(node);
4c8e44
+}
4c8e44
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
4c8e44
index 2a92db5..68992f5 100644
4c8e44
--- a/daemons/controld/controld_utils.h
4c8e44
+++ b/daemons/controld/controld_utils.h
4c8e44
@@ -95,6 +95,8 @@ unsigned int cib_op_timeout(void);
4c8e44
 bool feature_set_compatible(const char *dc_version, const char *join_version);
4c8e44
 bool controld_action_is_recordable(const char *action);
4c8e44
 
4c8e44
+const char *get_node_id(xmlNode *lrm_rsc_op);
4c8e44
+
4c8e44
 /* Convenience macro for registering a CIB callback
4c8e44
  * (assumes that data can be freed with free())
4c8e44
  */
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44