599c7d
From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 9 Aug 2021 14:48:57 -0500
599c7d
Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are
599c7d
 cleared without DC
599c7d
599c7d
Previously, peer_update_callback() cleared a lost node's transient attributes
599c7d
if either the local node is DC, or there is no DC.
599c7d
599c7d
However, that left the possibility of the DC being lost at the same time as
599c7d
another node -- the local node would still have fsa_our_dc set while processing
599c7d
the leave notifications, so no node would clear the attributes for the non-DC
599c7d
node.
599c7d
599c7d
Now, the controller has its own CPG configuration change callback, which sets a
599c7d
global boolean before calling the usual one, so that peer_update_callback() can
599c7d
know when the DC has been lost.
599c7d
---
599c7d
 daemons/controld/controld_callbacks.c |  4 +-
599c7d
 daemons/controld/controld_corosync.c  | 57 ++++++++++++++++++++++++++-
599c7d
 2 files changed, 59 insertions(+), 2 deletions(-)
599c7d
599c7d
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
599c7d
index af24856ae..e564b3dcd 100644
599c7d
--- a/daemons/controld/controld_callbacks.c
599c7d
+++ b/daemons/controld/controld_callbacks.c
599c7d
@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node)
599c7d
 
599c7d
 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
599c7d
 
599c7d
+bool controld_dc_left = false;
599c7d
+
599c7d
 void
599c7d
 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
599c7d
 {
599c7d
@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
599c7d
                                                cib_scope_local);
599c7d
                 }
599c7d
 
599c7d
-            } else if (AM_I_DC || (fsa_our_dc == NULL)) {
599c7d
+            } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) {
599c7d
                 /* This only needs to be done once, so normally the DC should do
599c7d
                  * it. However if there is no DC, every node must do it, since
599c7d
                  * there is no other way to ensure some one node does it.
599c7d
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
599c7d
index db99630fb..c5ab6580a 100644
599c7d
--- a/daemons/controld/controld_corosync.c
599c7d
+++ b/daemons/controld/controld_corosync.c
599c7d
@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data)
599c7d
     }
599c7d
 }
599c7d
 
599c7d
+extern bool controld_dc_left;
599c7d
+
599c7d
+/*!
599c7d
+ * \brief Handle a Corosync notification of a CPG configuration change
599c7d
+ *
599c7d
+ * \param[in] handle               CPG connection
599c7d
+ * \param[in] cpg_name             CPG group name
599c7d
+ * \param[in] member_list          List of current CPG members
599c7d
+ * \param[in] member_list_entries  Number of entries in \p member_list
599c7d
+ * \param[in] left_list            List of CPG members that left
599c7d
+ * \param[in] left_list_entries    Number of entries in \p left_list
599c7d
+ * \param[in] joined_list          List of CPG members that joined
599c7d
+ * \param[in] joined_list_entries  Number of entries in \p joined_list
599c7d
+ */
599c7d
+static void
599c7d
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
599c7d
+                        const struct cpg_address *member_list,
599c7d
+                        size_t member_list_entries,
599c7d
+                        const struct cpg_address *left_list,
599c7d
+                        size_t left_list_entries,
599c7d
+                        const struct cpg_address *joined_list,
599c7d
+                        size_t joined_list_entries)
599c7d
+{
599c7d
+    /* When nodes leave CPG, the DC clears their transient node attributes.
599c7d
+     *
599c7d
+     * However if there is no DC, or the DC is among the nodes that left, each
599c7d
+     * remaining node needs to do the clearing, to ensure it gets done.
599c7d
+     * Otherwise, the attributes would persist when the nodes rejoin, which
599c7d
+     * could have serious consequences for unfencing, agents that use attributes
599c7d
+     * for internal logic, etc.
599c7d
+     *
599c7d
+     * Here, we set a global boolean if the DC is among the nodes that left, for
599c7d
+     * use by the peer callback.
599c7d
+     */
599c7d
+    if (fsa_our_dc != NULL) {
599c7d
+        crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc);
599c7d
+
599c7d
+        if (peer != NULL) {
599c7d
+            for (int i = 0; i < left_list_entries; ++i) {
599c7d
+                if (left_list[i].nodeid == peer->id) {
599c7d
+                    controld_dc_left = true;
599c7d
+                    break;
599c7d
+                }
599c7d
+            }
599c7d
+        }
599c7d
+    }
599c7d
+
599c7d
+    // Process the change normally, which will call the peer callback as needed
599c7d
+    pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
599c7d
+                        left_list, left_list_entries,
599c7d
+                        joined_list, joined_list_entries);
599c7d
+
599c7d
+    controld_dc_left = false;
599c7d
+}
599c7d
+
599c7d
 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
599c7d
 
599c7d
 gboolean
599c7d
@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster)
599c7d
     if (is_corosync_cluster()) {
599c7d
         crm_set_status_callback(&peer_update_callback);
599c7d
         cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
599c7d
-        cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
599c7d
+        cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
599c7d
         cluster->destroy = crmd_cs_destroy;
599c7d
 
599c7d
         if (crm_cluster_connect(cluster)) {
599c7d
-- 
599c7d
2.27.0
599c7d