109fe2
From 7c322f4b9a7f36eba1d3ca74d7dd8fe1093ca7bd Mon Sep 17 00:00:00 2001
109fe2
From: Ken Gaillot <kgaillot@redhat.com>
109fe2
Date: Mon, 22 Jan 2018 11:38:22 -0600
109fe2
Subject: [PATCH] Low: crmd: quorum gain should always cause new transition
109fe2
109fe2
0b689055 aborted the transition on quorum loss, but quorum can also be acquired
109fe2
without triggering a new transition, if corosync gives quorum without a node
109fe2
joining (e.g. forced via corosync-cmapctl, or perhaps via heuristics).
109fe2
109fe2
This aborts the transition when quorum is gained, but only after a 5-second
109fe2
delay, if the transition has not been aborted in that time. This avoids an
109fe2
unnecessary abort in the vast majority of cases where an abort is already done,
109fe2
and it allows some time for all nodes to connect when quorum is gained, rather
109fe2
than immediately fencing remaining unseen nodes.
109fe2
---
109fe2
 crmd/membership.c | 22 +++++++++++++++++-----
109fe2
 crmd/te_utils.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
109fe2
 crmd/tengine.h    |  2 ++
109fe2
 3 files changed, 65 insertions(+), 7 deletions(-)
109fe2
109fe2
diff --git a/crmd/membership.c b/crmd/membership.c
109fe2
index c36dbed..4f2fa8a 100644
109fe2
--- a/crmd/membership.c
109fe2
+++ b/crmd/membership.c
109fe2
@@ -438,12 +438,24 @@ crm_update_quorum(gboolean quorum, gboolean force_update)
109fe2
         fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
109fe2
         free_xml(update);
109fe2
 
109fe2
-        /* If a node not running any resources is cleanly shut down and drops us
109fe2
-         * below quorum, we won't necessarily abort the transition, so abort it
109fe2
-         * here to be safe.
109fe2
+        /* Quorum changes usually cause a new transition via other activity:
109fe2
+         * quorum gained via a node joining will abort via the node join,
109fe2
+         * and quorum lost via a node leaving will usually abort via resource
109fe2
+         * activity and/or fencing.
109fe2
+         *
109fe2
+         * However, it is possible that nothing else causes a transition (e.g.
109fe2
+         * someone forces quorum via corosync-cmaptcl, or quorum is lost due to
109fe2
+         * a node in standby shutting down cleanly), so here ensure a new
109fe2
+         * transition is triggered.
109fe2
          */
109fe2
-        if (quorum == FALSE) {
109fe2
-            abort_transition(INFINITY, tg_restart, "Quorum loss", NULL);
109fe2
+        if (quorum) {
109fe2
+            /* If quorum was gained, abort after a short delay, in case multiple
109fe2
+             * nodes are joining around the same time, so the one that brings us
109fe2
+             * to quorum doesn't cause all the remaining ones to be fenced.
109fe2
+             */
109fe2
+            abort_after_delay(INFINITY, tg_restart, "Quorum gained", 5000);
109fe2
+        } else {
109fe2
+            abort_transition(INFINITY, tg_restart, "Quorum lost", NULL);
109fe2
         }
109fe2
     }
109fe2
     fsa_has_quorum = quorum;
109fe2
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
109fe2
index dab02d3..8d105dc 100644
109fe2
--- a/crmd/te_utils.c
109fe2
+++ b/crmd/te_utils.c
109fe2
@@ -530,6 +530,46 @@ trigger_graph_processing(const char *fn, int line)
109fe2
     mainloop_set_trigger(transition_trigger);
109fe2
 }
109fe2
 
109fe2
+static struct abort_timer_s {
109fe2
+    bool aborted;
109fe2
+    guint id;
109fe2
+    int priority;
109fe2
+    enum transition_action action;
109fe2
+    const char *text;
109fe2
+} abort_timer = { 0, };
109fe2
+
109fe2
+static gboolean
109fe2
+abort_timer_popped(gpointer data)
109fe2
+{
109fe2
+    if (abort_timer.aborted == FALSE) {
109fe2
+        abort_transition(abort_timer.priority, abort_timer.action,
109fe2
+                         abort_timer.text, NULL);
109fe2
+    }
109fe2
+    abort_timer.id = 0;
109fe2
+    return FALSE; // do not immediately reschedule timer
109fe2
+}
109fe2
+
109fe2
+/*!
109fe2
+ * \internal
109fe2
+ * \brief Abort transition after delay, if not already aborted in that time
109fe2
+ *
109fe2
+ * \param[in] abort_text  Must be literal string
109fe2
+ */
109fe2
+void
109fe2
+abort_after_delay(int abort_priority, enum transition_action abort_action,
109fe2
+                  const char *abort_text, guint delay_ms)
109fe2
+{
109fe2
+    if (abort_timer.id) {
109fe2
+        // Timer already in progress, stop and reschedule
109fe2
+        g_source_remove(abort_timer.id);
109fe2
+    }
109fe2
+    abort_timer.aborted = FALSE;
109fe2
+    abort_timer.priority = abort_priority;
109fe2
+    abort_timer.action = abort_action;
109fe2
+    abort_timer.text = abort_text;
109fe2
+    abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, NULL);
109fe2
+}
109fe2
+
109fe2
 void
109fe2
 abort_transition_graph(int abort_priority, enum transition_action abort_action,
109fe2
                        const char *abort_text, xmlNode * reason, const char *fn, int line)
109fe2
@@ -557,6 +597,8 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action,
109fe2
             break;
109fe2
     }
109fe2
 
109fe2
+    abort_timer.aborted = TRUE;
109fe2
+
109fe2
     /* Make sure any queued calculations are discarded ASAP */
109fe2
     free(fsa_pe_ref);
109fe2
     fsa_pe_ref = NULL;
109fe2
@@ -660,10 +702,12 @@ abort_transition_graph(int abort_priority, enum transition_action abort_action,
109fe2
                        (transition_graph->complete? "true" : "false"));
109fe2
 
109fe2
         } else {
109fe2
+            const char *id = ID(reason);
109fe2
+
109fe2
             do_crm_log(level, "Transition aborted by %s.%s '%s': %s "
109fe2
                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
109fe2
-                       TYPE(reason), ID(reason), (op? op : "change"), abort_text,
109fe2
-                       add[0], add[1], add[2], fn, line, path,
109fe2
+                       TYPE(reason), (id? id : ""), (op? op : "change"),
109fe2
+                       abort_text, add[0], add[1], add[2], fn, line, path,
109fe2
                        (transition_graph->complete? "true" : "false"));
109fe2
         }
109fe2
     }
109fe2
diff --git a/crmd/tengine.h b/crmd/tengine.h
109fe2
index 7205c16..6a75a08 100644
109fe2
--- a/crmd/tengine.h
109fe2
+++ b/crmd/tengine.h
109fe2
@@ -59,6 +59,8 @@ extern void notify_crmd(crm_graph_t * graph);
109fe2
 #  include <te_callbacks.h>
109fe2
 
109fe2
 extern void trigger_graph_processing(const char *fn, int line);
109fe2
+void abort_after_delay(int abort_priority, enum transition_action abort_action,
109fe2
+                       const char *abort_text, guint delay_ms);
109fe2
 extern void abort_transition_graph(int abort_priority, enum transition_action abort_action,
109fe2
                                    const char *abort_text, xmlNode * reason, const char *fn,
109fe2
                                    int line);
109fe2
-- 
109fe2
1.8.3.1
109fe2