Blame SOURCES/002-failed-monitors.patch

305287
From 5470f1d9c776dbf753e015fa96153b6a63c17b83 Mon Sep 17 00:00:00 2001
305287
From: "Gao,Yan" <ygao@suse.com>
305287
Date: Thu, 9 May 2019 13:24:35 +0200
305287
Subject: [PATCH] Fix: controller: confirm cancel of failed monitors
305287
305287
Usually after a monitor has been cancelled from executor, contoller
305287
erases the corresponding lrm_rsc_op from the cib, and DC will confirm
305287
the cancel action by process_op_deletion() according to the cib diff.
305287
305287
But if a monitor has failed, the lrm_rsc_op will be recorded as
305287
"last_failure". When cancelling it, the lrm_rsc_op won't get erased from
305287
the cib given the logic on purpose in erase_lrm_history_by_op(). So that
305287
the cancel action won't have a chance to get confirmed by DC with
305287
process_op_deletion().
305287
305287
Previously cluster transition would get stuck waiting for the remaining
305287
action timer to time out.
305287
305287
This commit fixes the issue by directly acknowledging the cancel action
305287
in this case and enabling DC to be able to confirm it.
305287
305287
This also moves get_node_id() function into controld_utils.c for common
305287
use.
305287
305287
Producer:
305287
```
305287
 # Insert a 10s sleep in the monitor action of RA
305287
 # /usr/lib/ocf/resource.d/pacemaker/Stateful:
305287
305287
  stateful_monitor() {
305287
 +    sleep 10
305287
      stateful_check_state "master"
305287
305287
 # Add a promotable clone resource:
305287
305287
 crm configure primitive stateful ocf:pacemaker:Stateful \
305287
         op monitor interval=5 role=Master \
305287
         op monitor interval=10 role=Slave
305287
 crm configure clone p-clone stateful \
305287
         meta promotable=true
305287
305287
 # Wait for the resource instance to be started, promoted to be master,
305287
 # and monitor for master role to complete.
305287
305287
 # Set is-managed=false for the promotable clone:
305287
 crm_resource --meta -p is-managed -v false -r p-clone
305287
305287
 # Change the status of the master instance to be slave and immediately
305287
 # enforce refresh of it:
305287
 echo slave > /var/run/Stateful-stateful.state; crm_resource --refresh -r stateful --force
305287
305287
 # Wait for probe to complete, and then monitor for slave role to be
305287
 # issued:
305287
 sleep 15
305287
305287
 # While the monitor for slave role is still in progress, change the
305287
 # status to be master again:
305287
 echo master > /var/run/Stateful-stateful.state
305287
305287
 # The monitor for slave role returns error. Cluster issues monitor for
305287
 # master role instead and tries to cancel the failed one for slave role.
305287
 # But cluster transition gets stuck. Depending on the monitor timeout
305287
 # configured for the slave role plus cluster-delay, only after that
305287
 # controller eventually says:
305287
305287
 pacemaker-controld[21205] error: Node opensuse150 did not send cancel result (via controller) within 20000ms (action timeout plus cluster-delay)
305287
 pacemaker-controld[21205] error: [Action    1]: In-flight rsc op stateful_monitor_10000            on opensuse150 (priority: 0, waiting: none)
305287
 pacemaker-controld[21205] notice: Transition 6 aborted: Action lost
305287
305287
```
305287
---
305287
 daemons/controld/controld_execd.c        | 38 ++++++++++++++++++++++++++++++++
305287
 daemons/controld/controld_te_callbacks.c | 21 ++----------------
305287
 daemons/controld/controld_te_events.c    | 32 +++++++++++++++++++++++++++
305287
 daemons/controld/controld_transition.h   |  1 +
305287
 daemons/controld/controld_utils.c        | 13 +++++++++++
305287
 daemons/controld/controld_utils.h        |  2 ++
305287
 6 files changed, 88 insertions(+), 19 deletions(-)
305287
305287
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
305287
index 976fed1..8282fed 100644
305287
--- a/daemons/controld/controld_execd.c
305287
+++ b/daemons/controld/controld_execd.c
305287
@@ -2476,6 +2476,30 @@ unescape_newlines(const char *string)
305287
     return ret;
305287
 }
305287
 
305287
+static bool
305287
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
305287
+                    const char * op_type, guint interval_ms)
305287
+{
305287
+    rsc_history_t *entry = NULL;
305287
+
305287
+    CRM_CHECK(lrm_state != NULL, return FALSE);
305287
+    CRM_CHECK(rsc_id != NULL, return FALSE);
305287
+    CRM_CHECK(op_type != NULL, return FALSE);
305287
+
305287
+    entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
305287
+    if (entry == NULL || entry->failed == NULL) {
305287
+        return FALSE;
305287
+    }
305287
+
305287
+    if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
305287
+        && safe_str_eq(entry->failed->op_type, op_type)
305287
+        && entry->failed->interval_ms == interval_ms) {
305287
+        return TRUE;
305287
+    }
305287
+
305287
+    return FALSE;
305287
+}
305287
+
305287
 void
305287
 process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
305287
                   struct recurring_op_s *pending, xmlNode *action_xml)
305287
@@ -2605,6 +2629,20 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
305287
             erase_lrm_history_by_op(lrm_state, op);
305287
         }
305287
 
305287
+        /* If the recurring operation had failed, the lrm_rsc_op is recorded as
305287
+         * "last_failure" which won't get erased from the cib given the logic on
305287
+         * purpose in erase_lrm_history_by_op(). So that the cancel action won't
305287
+         * have a chance to get confirmed by DC with process_op_deletion().
305287
+         * Cluster transition would get stuck waiting for the remaining action
305287
+         * timer to time out.
305287
+         *
305287
+         * Directly acknowledge the cancel operation in this case.
305287
+         */
305287
+        if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
305287
+                                pending->op_type, pending->interval_ms)) {
305287
+            need_direct_ack = TRUE;
305287
+        }
305287
+
305287
     } else if (op->rsc_deleted) {
305287
         /* This recurring operation was cancelled (but not by us, and the
305287
          * executor does not have resource information, likely due to resource
305287
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
305287
index 51d908e..22b5f4b 100644
305287
--- a/daemons/controld/controld_te_callbacks.c
305287
+++ b/daemons/controld/controld_te_callbacks.c
305287
@@ -32,19 +32,6 @@ static unsigned long int stonith_max_attempts = 10;
305287
 /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
305287
 #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
305287
 
305287
-static const char *
305287
-get_node_id(xmlNode * rsc_op)
305287
-{
305287
-    xmlNode *node = rsc_op;
305287
-
305287
-    while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
305287
-        node = node->parent;
305287
-    }
305287
-
305287
-    CRM_CHECK(node != NULL, return NULL);
305287
-    return ID(node);
305287
-}
305287
-
305287
 void
305287
 update_stonith_max_attempts(const char* value)
305287
 {
305287
@@ -374,12 +361,8 @@ process_op_deletion(const char *xpath, xmlNode *change)
305287
     node_uuid = extract_node_uuid(xpath);
305287
     cancel = get_cancel_action(key, node_uuid);
305287
     if (cancel) {
305287
-        crm_info("Cancellation of %s on %s confirmed (%d)",
305287
-                 key, node_uuid, cancel->id);
305287
-        stop_te_timer(cancel->timer);
305287
-        te_action_confirmed(cancel);
305287
-        update_graph(transition_graph, cancel);
305287
-        trigger_graph();
305287
+        confirm_cancel_action(cancel);
305287
+
305287
     } else {
305287
         abort_transition(INFINITY, tg_restart, "Resource operation removal",
305287
                          change);
305287
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
305287
index c0d096f..b7b48a4 100644
305287
--- a/daemons/controld/controld_te_events.c
305287
+++ b/daemons/controld/controld_te_events.c
305287
@@ -355,6 +355,27 @@ get_cancel_action(const char *id, const char *node)
305287
     return NULL;
305287
 }
305287
 
305287
+void
305287
+confirm_cancel_action(crm_action_t *cancel)
305287
+{
305287
+    const char *op_key = NULL;
305287
+    const char *node_name = NULL;
305287
+
305287
+    CRM_ASSERT(cancel != NULL);
305287
+
305287
+    op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
305287
+    node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
305287
+
305287
+    stop_te_timer(cancel->timer);
305287
+    te_action_confirmed(cancel);
305287
+    update_graph(transition_graph, cancel);
305287
+
305287
+    crm_info("Cancellation of %s on %s confirmed (action %d)",
305287
+             op_key, node_name, cancel->id);
305287
+
305287
+    trigger_graph();
305287
+}
305287
+
305287
 /* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
305287
 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
305287
                      "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
305287
@@ -471,6 +492,17 @@ process_graph_event(xmlNode *event, const char *event_node)
305287
             /* Recurring actions have the transition number they were first
305287
              * scheduled in.
305287
              */
305287
+
305287
+            if (status == PCMK_LRM_OP_CANCELLED) {
305287
+                const char *node_id = get_node_id(event);
305287
+
305287
+                action = get_cancel_action(id, node_id);
305287
+                if (action) {
305287
+                    confirm_cancel_action(action);
305287
+                }
305287
+                goto bail;
305287
+            }
305287
+
305287
             desc = "arrived after initial scheduling";
305287
             abort_transition(INFINITY, tg_restart, "Change in recurring result",
305287
                              event);
305287
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
305287
index 0a33599..a162f99 100644
305287
--- a/daemons/controld/controld_transition.h
305287
+++ b/daemons/controld/controld_transition.h
305287
@@ -25,6 +25,7 @@ void execute_stonith_cleanup(void);
305287
 /* tengine */
305287
 extern crm_action_t *match_down_event(const char *target);
305287
 extern crm_action_t *get_cancel_action(const char *id, const char *node);
305287
+void confirm_cancel_action(crm_action_t *cancel);
305287
 
305287
 void controld_record_action_timeout(crm_action_t *action);
305287
 extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node);
305287
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
305287
index ca7e15d..35922f0 100644
305287
--- a/daemons/controld/controld_utils.c
305287
+++ b/daemons/controld/controld_utils.c
305287
@@ -1073,3 +1073,16 @@ feature_set_compatible(const char *dc_version, const char *join_version)
305287
     // DC's minor version must be the same or older
305287
     return dc_v <= join_v;
305287
 }
305287
+
305287
+const char *
305287
+get_node_id(xmlNode *lrm_rsc_op)
305287
+{
305287
+    xmlNode *node = lrm_rsc_op;
305287
+
305287
+    while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
305287
+        node = node->parent;
305287
+    }
305287
+
305287
+    CRM_CHECK(node != NULL, return NULL);
305287
+    return ID(node);
305287
+}
305287
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
305287
index 2a92db5..68992f5 100644
305287
--- a/daemons/controld/controld_utils.h
305287
+++ b/daemons/controld/controld_utils.h
305287
@@ -95,6 +95,8 @@ unsigned int cib_op_timeout(void);
305287
 bool feature_set_compatible(const char *dc_version, const char *join_version);
305287
 bool controld_action_is_recordable(const char *action);
305287
 
305287
+const char *get_node_id(xmlNode *lrm_rsc_op);
305287
+
305287
 /* Convenience macro for registering a CIB callback
305287
  * (assumes that data can be freed with free())
305287
  */
305287
-- 
305287
1.8.3.1
305287