06a831
From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001
06a831
From: Ken Gaillot <kgaillot@redhat.com>
06a831
Date: Mon, 9 Aug 2021 14:48:57 -0500
06a831
Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are
06a831
 cleared without DC
06a831
06a831
Previously, peer_update_callback() cleared a lost node's transient attributes
06a831
if either the local node is DC, or there is no DC.
06a831
06a831
However, that left the possibility of the DC being lost at the same time as
06a831
another node -- the local node would still have fsa_our_dc set while processing
06a831
the leave notifications, so no node would clear the attributes for the non-DC
06a831
node.
06a831
06a831
Now, the controller has its own CPG configuration change callback, which sets a
06a831
global boolean before calling the usual one, so that peer_update_callback() can
06a831
know when the DC has been lost.
06a831
---
06a831
 daemons/controld/controld_callbacks.c |  4 +-
06a831
 daemons/controld/controld_corosync.c  | 57 ++++++++++++++++++++++++++-
06a831
 2 files changed, 59 insertions(+), 2 deletions(-)
06a831
06a831
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
06a831
index af24856ae..e564b3dcd 100644
06a831
--- a/daemons/controld/controld_callbacks.c
06a831
+++ b/daemons/controld/controld_callbacks.c
06a831
@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node)
06a831
 
06a831
 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
06a831
 
06a831
+bool controld_dc_left = false;
06a831
+
06a831
 void
06a831
 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
06a831
 {
06a831
@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
06a831
                                                cib_scope_local);
06a831
                 }
06a831
 
06a831
-            } else if (AM_I_DC || (fsa_our_dc == NULL)) {
06a831
+            } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) {
06a831
                 /* This only needs to be done once, so normally the DC should do
06a831
                  * it. However if there is no DC, every node must do it, since
06a831
                  * there is no other way to ensure some one node does it.
06a831
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
06a831
index db99630fb..c5ab6580a 100644
06a831
--- a/daemons/controld/controld_corosync.c
06a831
+++ b/daemons/controld/controld_corosync.c
06a831
@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data)
06a831
     }
06a831
 }
06a831
 
06a831
+extern bool controld_dc_left;
06a831
+
06a831
+/*!
06a831
+ * \brief Handle a Corosync notification of a CPG configuration change
06a831
+ *
06a831
+ * \param[in] handle               CPG connection
06a831
+ * \param[in] cpg_name             CPG group name
06a831
+ * \param[in] member_list          List of current CPG members
06a831
+ * \param[in] member_list_entries  Number of entries in \p member_list
06a831
+ * \param[in] left_list            List of CPG members that left
06a831
+ * \param[in] left_list_entries    Number of entries in \p left_list
06a831
+ * \param[in] joined_list          List of CPG members that joined
06a831
+ * \param[in] joined_list_entries  Number of entries in \p joined_list
06a831
+ */
06a831
+static void
06a831
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
06a831
+                        const struct cpg_address *member_list,
06a831
+                        size_t member_list_entries,
06a831
+                        const struct cpg_address *left_list,
06a831
+                        size_t left_list_entries,
06a831
+                        const struct cpg_address *joined_list,
06a831
+                        size_t joined_list_entries)
06a831
+{
06a831
+    /* When nodes leave CPG, the DC clears their transient node attributes.
06a831
+     *
06a831
+     * However if there is no DC, or the DC is among the nodes that left, each
06a831
+     * remaining node needs to do the clearing, to ensure it gets done.
06a831
+     * Otherwise, the attributes would persist when the nodes rejoin, which
06a831
+     * could have serious consequences for unfencing, agents that use attributes
06a831
+     * for internal logic, etc.
06a831
+     *
06a831
+     * Here, we set a global boolean if the DC is among the nodes that left, for
06a831
+     * use by the peer callback.
06a831
+     */
06a831
+    if (fsa_our_dc != NULL) {
06a831
+        crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc);
06a831
+
06a831
+        if (peer != NULL) {
06a831
+            for (int i = 0; i < left_list_entries; ++i) {
06a831
+                if (left_list[i].nodeid == peer->id) {
06a831
+                    controld_dc_left = true;
06a831
+                    break;
06a831
+                }
06a831
+            }
06a831
+        }
06a831
+    }
06a831
+
06a831
+    // Process the change normally, which will call the peer callback as needed
06a831
+    pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
06a831
+                        left_list, left_list_entries,
06a831
+                        joined_list, joined_list_entries);
06a831
+
06a831
+    controld_dc_left = false;
06a831
+}
06a831
+
06a831
 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
06a831
 
06a831
 gboolean
06a831
@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster)
06a831
     if (is_corosync_cluster()) {
06a831
         crm_set_status_callback(&peer_update_callback);
06a831
         cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
06a831
-        cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
06a831
+        cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
06a831
         cluster->destroy = crmd_cs_destroy;
06a831
 
06a831
         if (crm_cluster_connect(cluster)) {
06a831
-- 
06a831
2.27.0
06a831