7ac83c
From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001
7ac83c
From: Ken Gaillot <kgaillot@redhat.com>
7ac83c
Date: Mon, 9 Aug 2021 14:48:57 -0500
7ac83c
Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are
7ac83c
 cleared without DC
7ac83c
7ac83c
Previously, peer_update_callback() cleared a lost node's transient attributes
7ac83c
if either the local node is DC, or there is no DC.
7ac83c
7ac83c
However, that left the possibility of the DC being lost at the same time as
7ac83c
another node -- the local node would still have fsa_our_dc set while processing
7ac83c
the leave notifications, so no node would clear the attributes for the non-DC
7ac83c
node.
7ac83c
7ac83c
Now, the controller has its own CPG configuration change callback, which sets a
7ac83c
global boolean before calling the usual one, so that peer_update_callback() can
7ac83c
know when the DC has been lost.
7ac83c
---
7ac83c
 daemons/controld/controld_callbacks.c |  4 +-
7ac83c
 daemons/controld/controld_corosync.c  | 57 ++++++++++++++++++++++++++-
7ac83c
 2 files changed, 59 insertions(+), 2 deletions(-)
7ac83c
7ac83c
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
7ac83c
index af24856ae..e564b3dcd 100644
7ac83c
--- a/daemons/controld/controld_callbacks.c
7ac83c
+++ b/daemons/controld/controld_callbacks.c
7ac83c
@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node)
7ac83c
 
7ac83c
 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
7ac83c
 
7ac83c
+bool controld_dc_left = false;
7ac83c
+
7ac83c
 void
7ac83c
 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
7ac83c
 {
7ac83c
@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
7ac83c
                                                cib_scope_local);
7ac83c
                 }
7ac83c
 
7ac83c
-            } else if (AM_I_DC || (fsa_our_dc == NULL)) {
7ac83c
+            } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) {
7ac83c
                 /* This only needs to be done once, so normally the DC should do
7ac83c
                  * it. However if there is no DC, every node must do it, since
7ac83c
                  * there is no other way to ensure some one node does it.
7ac83c
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
7ac83c
index db99630fb..c5ab6580a 100644
7ac83c
--- a/daemons/controld/controld_corosync.c
7ac83c
+++ b/daemons/controld/controld_corosync.c
7ac83c
@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data)
7ac83c
     }
7ac83c
 }
7ac83c
 
7ac83c
+extern bool controld_dc_left;
7ac83c
+
7ac83c
+/*!
7ac83c
+ * \brief Handle a Corosync notification of a CPG configuration change
7ac83c
+ *
7ac83c
+ * \param[in] handle               CPG connection
7ac83c
+ * \param[in] cpg_name             CPG group name
7ac83c
+ * \param[in] member_list          List of current CPG members
7ac83c
+ * \param[in] member_list_entries  Number of entries in \p member_list
7ac83c
+ * \param[in] left_list            List of CPG members that left
7ac83c
+ * \param[in] left_list_entries    Number of entries in \p left_list
7ac83c
+ * \param[in] joined_list          List of CPG members that joined
7ac83c
+ * \param[in] joined_list_entries  Number of entries in \p joined_list
7ac83c
+ */
7ac83c
+static void
7ac83c
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
7ac83c
+                        const struct cpg_address *member_list,
7ac83c
+                        size_t member_list_entries,
7ac83c
+                        const struct cpg_address *left_list,
7ac83c
+                        size_t left_list_entries,
7ac83c
+                        const struct cpg_address *joined_list,
7ac83c
+                        size_t joined_list_entries)
7ac83c
+{
7ac83c
+    /* When nodes leave CPG, the DC clears their transient node attributes.
7ac83c
+     *
7ac83c
+     * However if there is no DC, or the DC is among the nodes that left, each
7ac83c
+     * remaining node needs to do the clearing, to ensure it gets done.
7ac83c
+     * Otherwise, the attributes would persist when the nodes rejoin, which
7ac83c
+     * could have serious consequences for unfencing, agents that use attributes
7ac83c
+     * for internal logic, etc.
7ac83c
+     *
7ac83c
+     * Here, we set a global boolean if the DC is among the nodes that left, for
7ac83c
+     * use by the peer callback.
7ac83c
+     */
7ac83c
+    if (fsa_our_dc != NULL) {
7ac83c
+        crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc);
7ac83c
+
7ac83c
+        if (peer != NULL) {
7ac83c
+            for (int i = 0; i < left_list_entries; ++i) {
7ac83c
+                if (left_list[i].nodeid == peer->id) {
7ac83c
+                    controld_dc_left = true;
7ac83c
+                    break;
7ac83c
+                }
7ac83c
+            }
7ac83c
+        }
7ac83c
+    }
7ac83c
+
7ac83c
+    // Process the change normally, which will call the peer callback as needed
7ac83c
+    pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
7ac83c
+                        left_list, left_list_entries,
7ac83c
+                        joined_list, joined_list_entries);
7ac83c
+
7ac83c
+    controld_dc_left = false;
7ac83c
+}
7ac83c
+
7ac83c
 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
7ac83c
 
7ac83c
 gboolean
7ac83c
@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster)
7ac83c
     if (is_corosync_cluster()) {
7ac83c
         crm_set_status_callback(&peer_update_callback);
7ac83c
         cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
7ac83c
-        cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
7ac83c
+        cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
7ac83c
         cluster->destroy = crmd_cs_destroy;
7ac83c
 
7ac83c
         if (crm_cluster_connect(cluster)) {
7ac83c
-- 
7ac83c
2.27.0
7ac83c