Blame SOURCES/010-fix-history-handing-on-fenced-restart.patch

4c8e44
From 14bb468ab404228cae34809420ef0763d3d54482 Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Thu, 13 Jun 2019 15:31:24 +0200
4c8e44
Subject: [PATCH] Fix: fence-history: fail leftover pending-actions after
4c8e44
 fenced-restart
4c8e44
4c8e44
---
4c8e44
 daemons/fenced/fenced_history.c   | 15 +++++++++++++++
4c8e44
 daemons/fenced/fenced_remote.c    |  6 +++---
4c8e44
 daemons/fenced/pacemaker-fenced.h |  8 ++++++++
4c8e44
 3 files changed, 26 insertions(+), 3 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
4c8e44
index 7c129cc..b65b64c 100644
4c8e44
--- a/daemons/fenced/fenced_history.c
4c8e44
+++ b/daemons/fenced/fenced_history.c
4c8e44
@@ -347,6 +347,21 @@ stonith_merge_in_history_list(GHashTable *history)
4c8e44
 
4c8e44
         updated = TRUE;
4c8e44
         g_hash_table_iter_steal(&iter);
4c8e44
+
4c8e44
+        if ((op->state != st_failed) &&
4c8e44
+            (op->state != st_done) &&
4c8e44
+            safe_str_eq(op->originator, stonith_our_uname)) {
4c8e44
+            crm_warn("received pending action we are supposed to be the "
4c8e44
+                     "owner but it's not in our records -> fail it");
4c8e44
+            op->state = st_failed;
4c8e44
+            op->completed = time(NULL);
4c8e44
+            /* use -EHOSTUNREACH to not introduce a new return-code that might
4c8e44
+               trigger unexpected results at other places and to prevent
4c8e44
+               remote_op_done from setting the delegate if not present
4c8e44
+             */
4c8e44
+            stonith_bcast_result_to_peers(op, -EHOSTUNREACH);
4c8e44
+        }
4c8e44
+
4c8e44
         g_hash_table_insert(stonith_remote_op_list, op->id, op);
4c8e44
         /* we could trim the history here but if we bail
4c8e44
          * out after trim we might miss more recent entries
4c8e44
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
4c8e44
index 7d61249..5b86f0f 100644
4c8e44
--- a/daemons/fenced/fenced_remote.c
4c8e44
+++ b/daemons/fenced/fenced_remote.c
4c8e44
@@ -369,8 +369,8 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
4c8e44
     return notify_data;
4c8e44
 }
4c8e44
 
4c8e44
-static void
4c8e44
-bcast_result_to_peers(remote_fencing_op_t * op, int rc)
4c8e44
+void
4c8e44
+stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc)
4c8e44
 {
4c8e44
     static int count = 0;
4c8e44
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
4c8e44
@@ -509,7 +509,7 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
4c8e44
     subt = crm_element_value(data, F_SUBTYPE);
4c8e44
     if (dup == FALSE && safe_str_neq(subt, "broadcast")) {
4c8e44
         /* Defer notification until the bcast message arrives */
4c8e44
-        bcast_result_to_peers(op, rc);
4c8e44
+        stonith_bcast_result_to_peers(op, rc);
4c8e44
         goto remote_op_done_cleanup;
4c8e44
     }
4c8e44
 
4c8e44
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
4c8e44
index 3a2edbb..a8531a6 100644
4c8e44
--- a/daemons/fenced/pacemaker-fenced.h
4c8e44
+++ b/daemons/fenced/pacemaker-fenced.h
4c8e44
@@ -149,6 +149,14 @@ typedef struct remote_fencing_op_s {
4c8e44
 
4c8e44
 } remote_fencing_op_t;
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \internal
4c8e44
+ * \brief Broadcast the result of an operation to the peers.
4c8e44
+ * \param op, Operation whose result should be broadcast
4c8e44
+ * \param rc, Result of the operation
4c8e44
+ */
4c8e44
+void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc);
4c8e44
+
4c8e44
 enum st_callback_flags {
4c8e44
     st_callback_unknown        = 0x0000,
4c8e44
     st_callback_notify_fence   = 0x0001,
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From a0bc0d3ab5aed64e37b1caae746f5c421696df1b Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Fri, 14 Jun 2019 13:41:43 +0200
4c8e44
Subject: [PATCH] Fix: controld-fencing: remove-notifications upon
4c8e44
 connection-destroy
4c8e44
4c8e44
---
4c8e44
 daemons/controld/controld_fencing.c | 9 ++++++++-
4c8e44
 1 file changed, 8 insertions(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
4c8e44
index 92336e9..b925bc5 100644
4c8e44
--- a/daemons/controld/controld_fencing.c
4c8e44
+++ b/daemons/controld/controld_fencing.c
4c8e44
@@ -403,7 +403,14 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
4c8e44
     }
4c8e44
 
4c8e44
     if (stonith_api) {
4c8e44
-        stonith_api->state = stonith_disconnected;
4c8e44
+        /* the client API won't properly reconnect notifications
4c8e44
+         * if they are still in the table - so remove them
4c8e44
+         */
4c8e44
+        stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
4c8e44
+        stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
4c8e44
+        if (stonith_api->state != stonith_disconnected) {
4c8e44
+            stonith_api->cmds->disconnect(st);
4c8e44
+        }
4c8e44
     }
4c8e44
 
4c8e44
     if (AM_I_DC) {
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From 487cdd9e3ec6ab47fde5074acbb2ff564047d59c Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Tue, 18 Jun 2019 14:09:20 +0200
4c8e44
Subject: [PATCH] Feature: fence-history: add notification upon history-synced
4c8e44
4c8e44
---
4c8e44
 daemons/fenced/fenced_history.c   |  5 +++++
4c8e44
 daemons/fenced/pacemaker-fenced.c |  3 +++
4c8e44
 daemons/fenced/pacemaker-fenced.h | 11 ++++++-----
4c8e44
 include/crm/stonith-ng.h          |  1 +
4c8e44
 4 files changed, 15 insertions(+), 5 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
4c8e44
index b65b64c..cd08d74 100644
4c8e44
--- a/daemons/fenced/fenced_history.c
4c8e44
+++ b/daemons/fenced/fenced_history.c
4c8e44
@@ -420,6 +420,11 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
4c8e44
         stonith_fence_history_cleanup(target,
4c8e44
             crm_element_value(msg, F_STONITH_CALLID) != NULL);
4c8e44
     } else if (options & st_opt_broadcast) {
4c8e44
+        /* there is no clear sign atm for when a history sync
4c8e44
+           is done so send a notification for anything
4c8e44
+           that smells like history-sync
4c8e44
+         */
4c8e44
+        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL);
4c8e44
         if (crm_element_value(msg, F_STONITH_CALLID)) {
4c8e44
             /* this is coming from the stonith-API
4c8e44
             *
4c8e44
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
4c8e44
index 7e9bb07..7a87f93 100644
4c8e44
--- a/daemons/fenced/pacemaker-fenced.c
4c8e44
+++ b/daemons/fenced/pacemaker-fenced.c
4c8e44
@@ -279,6 +279,9 @@ get_stonith_flag(const char *name)
4c8e44
     } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) {
4c8e44
         return st_callback_notify_history;
4c8e44
 
4c8e44
+    } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED)) {
4c8e44
+        return st_callback_notify_history_synced;
4c8e44
+
4c8e44
     }
4c8e44
     return st_callback_unknown;
4c8e44
 }
4c8e44
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
4c8e44
index a8531a6..583cb47 100644
4c8e44
--- a/daemons/fenced/pacemaker-fenced.h
4c8e44
+++ b/daemons/fenced/pacemaker-fenced.h
4c8e44
@@ -158,11 +158,12 @@ typedef struct remote_fencing_op_s {
4c8e44
 void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc);
4c8e44
 
4c8e44
 enum st_callback_flags {
4c8e44
-    st_callback_unknown        = 0x0000,
4c8e44
-    st_callback_notify_fence   = 0x0001,
4c8e44
-    st_callback_device_add     = 0x0004,
4c8e44
-    st_callback_device_del     = 0x0010,
4c8e44
-    st_callback_notify_history = 0x0020
4c8e44
+    st_callback_unknown               = 0x0000,
4c8e44
+    st_callback_notify_fence          = 0x0001,
4c8e44
+    st_callback_device_add            = 0x0004,
4c8e44
+    st_callback_device_del            = 0x0010,
4c8e44
+    st_callback_notify_history        = 0x0020,
4c8e44
+    st_callback_notify_history_synced = 0x0040
4c8e44
 };
4c8e44
 
4c8e44
 /*
4c8e44
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
4c8e44
index b640732..418a03c 100644
4c8e44
--- a/include/crm/stonith-ng.h
4c8e44
+++ b/include/crm/stonith-ng.h
4c8e44
@@ -29,6 +29,7 @@ extern "C" {
4c8e44
 #  define T_STONITH_NOTIFY_DISCONNECT     "st_notify_disconnect"
4c8e44
 #  define T_STONITH_NOTIFY_FENCE          "st_notify_fence"
4c8e44
 #  define T_STONITH_NOTIFY_HISTORY        "st_notify_history"
4c8e44
+#  define T_STONITH_NOTIFY_HISTORY_SYNCED "st_notify_history_synced"
4c8e44
 
4c8e44
 /* *INDENT-OFF* */
4c8e44
 enum stonith_state {
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From 03c4455fced74f093deb782198b1ba3076e52015 Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Tue, 18 Jun 2019 14:12:27 +0200
4c8e44
Subject: [PATCH] Fix: fence-history: resync fence-history after fenced crash
4c8e44
4c8e44
Setting up a 30s fallback timer to trigger history-sync if the
4c8e44
sync via DC doesn't happen
4c8e44
---
4c8e44
 daemons/controld/controld_callbacks.c |  2 +-
4c8e44
 daemons/controld/controld_control.c   |  2 +
4c8e44
 daemons/controld/controld_fencing.c   | 86 ++++++++++++++++++++++++++++++-----
4c8e44
 daemons/controld/controld_fencing.h   |  3 +-
4c8e44
 4 files changed, 79 insertions(+), 14 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
4c8e44
index 3ce7470..48225ac 100644
4c8e44
--- a/daemons/controld/controld_callbacks.c
4c8e44
+++ b/daemons/controld/controld_callbacks.c
4c8e44
@@ -211,7 +211,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
4c8e44
 
4c8e44
             } else if(AM_I_DC) {
4c8e44
                 if (appeared) {
4c8e44
-                    te_trigger_stonith_history_sync();
4c8e44
+                    te_trigger_stonith_history_sync(FALSE);
4c8e44
                 } else {
4c8e44
                     erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
4c8e44
                 }
4c8e44
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
4c8e44
index e99d605..f3bb20f 100644
4c8e44
--- a/daemons/controld/controld_control.c
4c8e44
+++ b/daemons/controld/controld_control.c
4c8e44
@@ -259,6 +259,8 @@ crmd_exit(crm_exit_t exit_code)
4c8e44
     crm_timer_stop(wait_timer);
4c8e44
     crm_timer_stop(recheck_timer);
4c8e44
 
4c8e44
+    te_cleanup_stonith_history_sync(NULL, TRUE);
4c8e44
+
4c8e44
     free(transition_timer); transition_timer = NULL;
4c8e44
     free(integration_timer); integration_timer = NULL;
4c8e44
     free(finalization_timer); finalization_timer = NULL;
4c8e44
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
4c8e44
index b925bc5..22fa727 100644
4c8e44
--- a/daemons/controld/controld_fencing.c
4c8e44
+++ b/daemons/controld/controld_fencing.c
4c8e44
@@ -20,6 +20,9 @@
4c8e44
 #  include <sys/reboot.h>
4c8e44
 #endif
4c8e44
 
4c8e44
+static void
4c8e44
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event);
4c8e44
+
4c8e44
 /*
4c8e44
  * stonith failure counting
4c8e44
  *
4c8e44
@@ -394,6 +397,8 @@ fail_incompletable_stonith(crm_graph_t *graph)
4c8e44
 static void
4c8e44
 tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
4c8e44
 {
4c8e44
+    te_cleanup_stonith_history_sync(st, FALSE);
4c8e44
+
4c8e44
     if (is_set(fsa_input_register, R_ST_REQUIRED)) {
4c8e44
         crm_crit("Fencing daemon connection failed");
4c8e44
         mainloop_set_trigger(stonith_reconnect);
4c8e44
@@ -406,11 +411,12 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
4c8e44
         /* the client API won't properly reconnect notifications
4c8e44
          * if they are still in the table - so remove them
4c8e44
          */
4c8e44
-        stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
4c8e44
-        stonith_api->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
4c8e44
         if (stonith_api->state != stonith_disconnected) {
4c8e44
             stonith_api->cmds->disconnect(st);
4c8e44
         }
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
4c8e44
     }
4c8e44
 
4c8e44
     if (AM_I_DC) {
4c8e44
@@ -622,7 +628,12 @@ te_connect_stonith(gpointer user_data)
4c8e44
         stonith_api->cmds->register_notification(stonith_api,
4c8e44
                                                  T_STONITH_NOTIFY_FENCE,
4c8e44
                                                  tengine_stonith_notify);
4c8e44
+        stonith_api->cmds->register_notification(stonith_api,
4c8e44
+                                                 T_STONITH_NOTIFY_HISTORY_SYNCED,
4c8e44
+                                                 tengine_stonith_history_synced);
4c8e44
+        te_trigger_stonith_history_sync(TRUE);
4c8e44
     }
4c8e44
+
4c8e44
     return TRUE;
4c8e44
 }
4c8e44
 
4c8e44
@@ -649,7 +660,12 @@ controld_disconnect_fencer(bool destroy)
4c8e44
         // Prevent fencer connection from coming up again
4c8e44
         clear_bit(fsa_input_register, R_ST_REQUIRED);
4c8e44
 
4c8e44
-        stonith_api->cmds->disconnect(stonith_api);
4c8e44
+        if (stonith_api->state != stonith_disconnected) {
4c8e44
+            stonith_api->cmds->disconnect(stonith_api);
4c8e44
+        }
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT);
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_FENCE);
4c8e44
+        stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED);
4c8e44
     }
4c8e44
     if (destroy) {
4c8e44
         if (stonith_api) {
4c8e44
@@ -673,6 +689,7 @@ do_stonith_history_sync(gpointer user_data)
4c8e44
     if (stonith_api && (stonith_api->state != stonith_disconnected)) {
4c8e44
         stonith_history_t *history = NULL;
4c8e44
 
4c8e44
+        te_cleanup_stonith_history_sync(stonith_api, FALSE);
4c8e44
         stonith_api->cmds->history(stonith_api,
4c8e44
                                    st_opt_sync_call | st_opt_broadcast,
4c8e44
                                    NULL, &history, 5);
4c8e44
@@ -845,7 +862,33 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
4c8e44
  */
4c8e44
 
4c8e44
 static crm_trigger_t *stonith_history_sync_trigger = NULL;
4c8e44
-static mainloop_timer_t *stonith_history_sync_timer = NULL;
4c8e44
+static mainloop_timer_t *stonith_history_sync_timer_short = NULL;
4c8e44
+static mainloop_timer_t *stonith_history_sync_timer_long = NULL;
4c8e44
+
4c8e44
+void
4c8e44
+te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers)
4c8e44
+{
4c8e44
+    if (free_timers) {
4c8e44
+        mainloop_timer_del(stonith_history_sync_timer_short);
4c8e44
+        stonith_history_sync_timer_short = NULL;
4c8e44
+        mainloop_timer_del(stonith_history_sync_timer_long);
4c8e44
+        stonith_history_sync_timer_long = NULL;
4c8e44
+    } else {
4c8e44
+        mainloop_timer_stop(stonith_history_sync_timer_short);
4c8e44
+        mainloop_timer_stop(stonith_history_sync_timer_long);
4c8e44
+    }
4c8e44
+
4c8e44
+    if (st) {
4c8e44
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED);
4c8e44
+    }
4c8e44
+}
4c8e44
+
4c8e44
+static void
4c8e44
+tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event)
4c8e44
+{
4c8e44
+    te_cleanup_stonith_history_sync(st, FALSE);
4c8e44
+    crm_debug("Fence-history synced - cancel all timers");
4c8e44
+}
4c8e44
 
4c8e44
 static gboolean
4c8e44
 stonith_history_sync_set_trigger(gpointer user_data)
4c8e44
@@ -855,11 +898,18 @@ stonith_history_sync_set_trigger(gpointer user_data)
4c8e44
 }
4c8e44
 
4c8e44
 void
4c8e44
-te_trigger_stonith_history_sync(void)
4c8e44
+te_trigger_stonith_history_sync(bool long_timeout)
4c8e44
 {
4c8e44
     /* trigger a sync in 5s to give more nodes the
4c8e44
      * chance to show up so that we don't create
4c8e44
      * unnecessary stonith-history-sync traffic
4c8e44
+     *
4c8e44
+     * the long timeout of 30s is there as a fallback
4c8e44
+     * so that after a successful connection to fenced
4c8e44
+     * we will wait for 30s for the DC to trigger a
4c8e44
+     * history-sync
4c8e44
+     * if this doesn't happen we trigger a sync locally
4c8e44
+     * (e.g. fenced segfaults and is restarted by pacemakerd)
4c8e44
      */
4c8e44
 
4c8e44
     /* as we are finally checking the stonith-connection
4c8e44
@@ -873,14 +923,26 @@ te_trigger_stonith_history_sync(void)
4c8e44
                                  do_stonith_history_sync, NULL);
4c8e44
     }
4c8e44
 
4c8e44
-    if(stonith_history_sync_timer == NULL) {
4c8e44
-        stonith_history_sync_timer =
4c8e44
-            mainloop_timer_add("history_sync", 5000,
4c8e44
-                               FALSE, stonith_history_sync_set_trigger,
4c8e44
-                               NULL);
4c8e44
+    if (long_timeout) {
4c8e44
+        if(stonith_history_sync_timer_long == NULL) {
4c8e44
+            stonith_history_sync_timer_long =
4c8e44
+                mainloop_timer_add("history_sync_long", 30000,
4c8e44
+                                   FALSE, stonith_history_sync_set_trigger,
4c8e44
+                                   NULL);
4c8e44
+        }
4c8e44
+        crm_info("Fence history will be synchronized cluster-wide within 30 seconds");
4c8e44
+        mainloop_timer_start(stonith_history_sync_timer_long);
4c8e44
+    } else {
4c8e44
+        if(stonith_history_sync_timer_short == NULL) {
4c8e44
+            stonith_history_sync_timer_short =
4c8e44
+                mainloop_timer_add("history_sync_short", 5000,
4c8e44
+                                   FALSE, stonith_history_sync_set_trigger,
4c8e44
+                                   NULL);
4c8e44
+        }
4c8e44
+        crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
4c8e44
+        mainloop_timer_start(stonith_history_sync_timer_short);
4c8e44
     }
4c8e44
-    crm_info("Fence history will be synchronized cluster-wide within 5 seconds");
4c8e44
-    mainloop_timer_start(stonith_history_sync_timer);
4c8e44
+
4c8e44
 }
4c8e44
 
4c8e44
 /* end stonith history synchronization functions */
4c8e44
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
4c8e44
index 8f7f19b..2fe6d88 100644
4c8e44
--- a/daemons/controld/controld_fencing.h
4c8e44
+++ b/daemons/controld/controld_fencing.h
4c8e44
@@ -29,6 +29,7 @@ void purge_stonith_cleanup(void);
4c8e44
 void execute_stonith_cleanup(void);
4c8e44
 
4c8e44
 // stonith history synchronization
4c8e44
-void te_trigger_stonith_history_sync(void);
4c8e44
+void te_trigger_stonith_history_sync(bool long_timeout);
4c8e44
+void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers);
4c8e44
 
4c8e44
 #endif
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From 2b038831edf6dd345c3f39f0fc27cfbf9503f512 Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Tue, 18 Jun 2019 21:54:49 +0200
4c8e44
Subject: [PATCH] Fix: st_client: make safe to remove notifications from
4c8e44
 notifications
4c8e44
4c8e44
While cycling over the notification-list just mark for deletion
4c8e44
and delete afterwards.
4c8e44
---
4c8e44
 lib/fencing/st_client.c | 58 +++++++++++++++++++++++++++++++++++++++++++++----
4c8e44
 1 file changed, 54 insertions(+), 4 deletions(-)
4c8e44
4c8e44
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
4c8e44
index 629887a..ba23ac5 100644
4c8e44
--- a/lib/fencing/st_client.c
4c8e44
+++ b/lib/fencing/st_client.c
4c8e44
@@ -67,6 +67,8 @@ typedef struct stonith_private_s {
4c8e44
     mainloop_io_t *source;
4c8e44
     GHashTable *stonith_op_callback_table;
4c8e44
     GList *notify_list;
4c8e44
+    int notify_refcnt;
4c8e44
+    bool notify_deletes;
4c8e44
 
4c8e44
     void (*op_callback) (stonith_t * st, stonith_callback_data_t * data);
4c8e44
 
4c8e44
@@ -77,6 +79,7 @@ typedef struct stonith_notify_client_s {
4c8e44
     const char *obj_id;         /* implement one day */
4c8e44
     const char *obj_type;       /* implement one day */
4c8e44
     void (*notify) (stonith_t * st, stonith_event_t * e);
4c8e44
+    bool delete;
4c8e44
 
4c8e44
 } stonith_notify_client_t;
4c8e44
 
4c8e44
@@ -211,6 +214,38 @@ log_action(stonith_action_t *action, pid_t pid)
4c8e44
     }
4c8e44
 }
4c8e44
 
4c8e44
+/* when cycling through the list we don't want to delete items
4c8e44
+   so just mark them and when we know nobody is using the list
4c8e44
+   loop over it to remove the marked items
4c8e44
+ */
4c8e44
+static void
4c8e44
+foreach_notify_entry (stonith_private_t *private,
4c8e44
+                GFunc func,
4c8e44
+                gpointer user_data)
4c8e44
+{
4c8e44
+    private->notify_refcnt++;
4c8e44
+    g_list_foreach(private->notify_list, func, user_data);
4c8e44
+    private->notify_refcnt--;
4c8e44
+    if ((private->notify_refcnt == 0) &&
4c8e44
+        private->notify_deletes) {
4c8e44
+        GList *list_item = private->notify_list;
4c8e44
+
4c8e44
+        private->notify_deletes = FALSE;
4c8e44
+        while (list_item != NULL)
4c8e44
+        {
4c8e44
+            stonith_notify_client_t *list_client = list_item->data;
4c8e44
+            GList *next = g_list_next(list_item);
4c8e44
+
4c8e44
+            if (list_client->delete) {
4c8e44
+                free(list_client);
4c8e44
+                private->notify_list =
4c8e44
+                    g_list_delete_link(private->notify_list, list_item);
4c8e44
+            }
4c8e44
+            list_item = next;
4c8e44
+        }
4c8e44
+    }
4c8e44
+}
4c8e44
+
4c8e44
 static void
4c8e44
 stonith_connection_destroy(gpointer user_data)
4c8e44
 {
4c8e44
@@ -230,7 +265,7 @@ stonith_connection_destroy(gpointer user_data)
4c8e44
     crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY);
4c8e44
     crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT);
4c8e44
 
4c8e44
-    g_list_foreach(native->notify_list, stonith_send_notification, &blob;;
4c8e44
+    foreach_notify_entry(native, stonith_send_notification, &blob;;
4c8e44
     free_xml(blob.xml);
4c8e44
 }
4c8e44
 
4c8e44
@@ -1140,6 +1175,10 @@ stonithlib_GCompareFunc(gconstpointer a, gconstpointer b)
4c8e44
     const stonith_notify_client_t *a_client = a;
4c8e44
     const stonith_notify_client_t *b_client = b;
4c8e44
 
4c8e44
+    if (a_client->delete || b_client->delete) {
4c8e44
+        /* make entries marked for deletion not findable */
4c8e44
+        return -1;
4c8e44
+    }
4c8e44
     CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0);
4c8e44
     rc = strcmp(a_client->event, b_client->event);
4c8e44
     if (rc == 0) {
4c8e44
@@ -1394,7 +1433,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
4c8e44
         stonith_perform_callback(st, blob.xml, 0, 0);
4c8e44
 
4c8e44
     } else if (safe_str_eq(type, T_STONITH_NOTIFY)) {
4c8e44
-        g_list_foreach(private->notify_list, stonith_send_notification, &blob;;
4c8e44
+        foreach_notify_entry(private, stonith_send_notification, &blob;;
4c8e44
     } else if (safe_str_eq(type, T_STONITH_TIMEOUT_VALUE)) {
4c8e44
         int call_id = 0;
4c8e44
         int timeout = 0;
4c8e44
@@ -1592,8 +1631,13 @@ stonith_api_del_notification(stonith_t * stonith, const char *event)
4c8e44
     if (list_item != NULL) {
4c8e44
         stonith_notify_client_t *list_client = list_item->data;
4c8e44
 
4c8e44
-        private->notify_list = g_list_remove(private->notify_list, list_client);
4c8e44
-        free(list_client);
4c8e44
+        if (private->notify_refcnt) {
4c8e44
+            list_client->delete = TRUE;
4c8e44
+            private->notify_deletes = TRUE;
4c8e44
+        } else {
4c8e44
+            private->notify_list = g_list_remove(private->notify_list, list_client);
4c8e44
+            free(list_client);
4c8e44
+        }
4c8e44
 
4c8e44
         crm_trace("Removed callback");
4c8e44
 
4c8e44
@@ -1754,6 +1798,10 @@ stonith_send_notification(gpointer data, gpointer user_data)
4c8e44
         crm_warn("Skipping callback - NULL callback client");
4c8e44
         return;
4c8e44
 
4c8e44
+    } else if (entry->delete) {
4c8e44
+        crm_trace("Skipping callback - marked for deletion");
4c8e44
+        return;
4c8e44
+
4c8e44
     } else if (entry->notify == NULL) {
4c8e44
         crm_warn("Skipping callback - NULL callback");
4c8e44
         return;
4c8e44
@@ -2037,6 +2085,8 @@ stonith_api_new(void)
4c8e44
     private->stonith_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
4c8e44
                                                                NULL, stonith_destroy_op_callback);
4c8e44
     private->notify_list = NULL;
4c8e44
+    private->notify_refcnt = 0;
4c8e44
+    private->notify_deletes = FALSE;
4c8e44
 
4c8e44
     new_stonith->call_id = 1;
4c8e44
     new_stonith->state = stonith_disconnected;
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From 03765b7803f935f0db149843a0b90aa9c872d922 Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Fri, 21 Jun 2019 14:13:10 +0200
4c8e44
Subject: [PATCH] Test: CTS: new pattern to identify fenced reconnected
4c8e44
4c8e44
Now that we are removing notifications upon disconnect a duplicate
4c8e44
notification can't be used as sign for reconnection any more.
4c8e44
---
4c8e44
 cts/patterns.py | 2 +-
4c8e44
 1 file changed, 1 insertion(+), 1 deletion(-)
4c8e44
4c8e44
diff --git a/cts/patterns.py b/cts/patterns.py
4c8e44
index 1b86ee7..8de67b1 100644
4c8e44
--- a/cts/patterns.py
4c8e44
+++ b/cts/patterns.py
4c8e44
@@ -303,7 +303,7 @@ class crm_corosync(BasePatterns):
4c8e44
         self.components["pacemaker-fenced"] = [
4c8e44
             r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
4c8e44
             r"Fencing daemon connection failed",
4c8e44
-            r"pacemaker-controld.*:\s*warn.*:\s*Callback already present",
4c8e44
+            r"pacemaker-controld.*Fencer successfully connected",
4c8e44
         ]
4c8e44
         self.components["pacemaker-fenced-ignore"] = [
4c8e44
             r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44
4c8e44
From c45c98cd77cb3e0913bcdb18fd6b116c3a25285d Mon Sep 17 00:00:00 2001
4c8e44
From: Klaus Wenninger <klaus.wenninger@aon.at>
4c8e44
Date: Fri, 21 Jun 2019 16:40:47 +0200
4c8e44
Subject: [PATCH] Fix: controld-fencing: add notice-log for successful
4c8e44
 fencer-connect
4c8e44
4c8e44
---
4c8e44
 daemons/controld/controld_fencing.c | 1 +
4c8e44
 1 file changed, 1 insertion(+)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
4c8e44
index 22fa727..2428168 100644
4c8e44
--- a/daemons/controld/controld_fencing.c
4c8e44
+++ b/daemons/controld/controld_fencing.c
4c8e44
@@ -632,6 +632,7 @@ te_connect_stonith(gpointer user_data)
4c8e44
                                                  T_STONITH_NOTIFY_HISTORY_SYNCED,
4c8e44
                                                  tengine_stonith_history_synced);
4c8e44
         te_trigger_stonith_history_sync(TRUE);
4c8e44
+        crm_notice("Fencer successfully connected");
4c8e44
     }
4c8e44
 
4c8e44
     return TRUE;
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44