Blame SOURCES/007-unfencing-loop.patch

d49186
From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Fri, 4 Jun 2021 16:30:55 -0500
d49186
Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote
d49186
 node
d49186
d49186
... so the controller can indicate the node is remote (if known at that point,
d49186
which is not guaranteed) when setting unfencing-related node attributes.
d49186
---
d49186
 daemons/controld/controld_fencing.c | 21 ++++++++++++++++++---
d49186
 1 file changed, 18 insertions(+), 3 deletions(-)
d49186
d49186
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
d49186
index 23dff28..0fba661 100644
d49186
--- a/daemons/controld/controld_fencing.c
d49186
+++ b/daemons/controld/controld_fencing.c
d49186
@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
d49186
             if (pcmk__str_eq("on", op, pcmk__str_casei)) {
d49186
                 const char *value = NULL;
d49186
                 char *now = pcmk__ttoa(time(NULL));
d49186
+                gboolean is_remote_node = FALSE;
d49186
+
d49186
+                /* This check is not 100% reliable, since this node is not
d49186
+                 * guaranteed to have the remote node cached. However, it
d49186
+                 * doesn't have to be reliable, since the attribute manager can
d49186
+                 * learn a node's "remoteness" by other means sooner or later.
d49186
+                 * This allows it to learn more quickly if this node does have
d49186
+                 * the information.
d49186
+                 */
d49186
+                if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
d49186
+                    is_remote_node = TRUE;
d49186
+                }
d49186
 
d49186
-                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
d49186
+                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
d49186
+                             is_remote_node);
d49186
                 free(now);
d49186
 
d49186
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
d49186
-                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
d49186
+                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
d49186
+                             is_remote_node);
d49186
 
d49186
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
d49186
-                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
d49186
+                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
d49186
+                             is_remote_node);
d49186
 
d49186
             } else if (action->sent_update == FALSE) {
d49186
                 send_stonith_update(action, target, uuid);
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Mon, 7 Jun 2021 10:50:36 -0500
d49186
Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote
d49186
 node to cache
d49186
d49186
... for future reuse
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++-----------
d49186
 1 file changed, 23 insertions(+), 11 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 731c243..93a165b 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -102,6 +102,28 @@ free_attribute(gpointer data)
d49186
     }
d49186
 }
d49186
 
d49186
+/*!
d49186
+ * \internal
d49186
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
d49186
+ *
d49186
+ * \param[in]
d49186
+ */
d49186
+static void
d49186
+cache_remote_node(const char *node_name)
d49186
+{
d49186
+    /* If we previously assumed this node was an unseen cluster node,
d49186
+     * remove its entry from the cluster peer cache.
d49186
+     */
d49186
+    crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name);
d49186
+
d49186
+    if (dup && (dup->uuid == NULL)) {
d49186
+        reap_crm_member(0, node_name);
d49186
+    }
d49186
+
d49186
+    // Ensure node is in the remote peer cache
d49186
+    CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
d49186
+}
d49186
+
d49186
 static xmlNode *
d49186
 build_attribute_xml(
d49186
     xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
d49186
@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
d49186
 
d49186
     crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
d49186
     if (is_remote) {
d49186
-        /* If we previously assumed this node was an unseen cluster node,
d49186
-         * remove its entry from the cluster peer cache.
d49186
-         */
d49186
-        crm_node_t *dup = pcmk__search_cluster_node_cache(0, host);
d49186
-
d49186
-        if (dup && (dup->uuid == NULL)) {
d49186
-            reap_crm_member(0, host);
d49186
-        }
d49186
-
d49186
-        /* Ensure this host is in the remote peer cache */
d49186
-        CRM_ASSERT(crm_remote_peer_get(host) != NULL);
d49186
+        cache_remote_node(host);
d49186
     }
d49186
 
d49186
     if (v == NULL) {
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Mon, 7 Jun 2021 10:39:34 -0500
d49186
Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes
d49186
 for remote nodes
d49186
d49186
Remote nodes never vote.
d49186
d49186
This has no effect in practice since the removal would simply do nothing,
d49186
but we might as well not waste time trying.
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 11 ++++++-----
d49186
 1 file changed, 6 insertions(+), 5 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 93a165b..dbe777e 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data)
d49186
 void
d49186
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
d49186
 {
d49186
-    bool remove_voter = FALSE;
d49186
+    bool gone = false;
d49186
+    bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
d49186
 
d49186
     switch (kind) {
d49186
         case crm_status_uname:
d49186
@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
d49186
 
d49186
         case crm_status_processes:
d49186
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
d49186
-                remove_voter = TRUE;
d49186
+                gone = true;
d49186
             }
d49186
             break;
d49186
 
d49186
@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
d49186
             } else {
d49186
                 // Remove all attribute values associated with lost nodes
d49186
                 attrd_peer_remove(peer->uname, FALSE, "loss");
d49186
-                remove_voter = TRUE;
d49186
+                gone = true;
d49186
             }
d49186
             break;
d49186
     }
d49186
 
d49186
-    // In case an election is in progress, remove any vote by the node
d49186
-    if (remove_voter) {
d49186
+    // Remove votes from cluster nodes that leave, in case election in progress
d49186
+    if (gone && !is_remote) {
d49186
         attrd_remove_voter(peer);
d49186
     }
d49186
 }
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Mon, 7 Jun 2021 10:40:06 -0500
d49186
Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer
d49186
 update callback
d49186
d49186
If a remote node was started before the local cluster node joined the cluster,
d49186
the cluster node will assume its node attributes are for a cluster node until
d49186
it learns otherwise. Check for remoteness in the peer update callback, to have
d49186
another way we can learn it.
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 4 ++++
d49186
 1 file changed, 4 insertions(+)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index dbe777e..5f6a754 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
d49186
     // Remove votes from cluster nodes that leave, in case election in progress
d49186
     if (gone && !is_remote) {
d49186
         attrd_remove_voter(peer);
d49186
+
d49186
+    // Ensure remote nodes that come up are in the remote node cache
d49186
+    } else if (!gone && is_remote) {
d49186
+        cache_remote_node(peer->uname);
d49186
     }
d49186
 }
d49186
 
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Mon, 7 Jun 2021 11:29:12 -0500
d49186
Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes
d49186
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 9 +++++++++
d49186
 1 file changed, 9 insertions(+)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 5f6a754..d6d179b 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data)
d49186
     return FALSE;
d49186
 }
d49186
 
d49186
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
d49186
 
d49186
 void
d49186
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
d49186
@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
d49186
 
d49186
     switch (kind) {
d49186
         case crm_status_uname:
d49186
+            crm_debug("%s node %s is now %s",
d49186
+                      (is_remote? "Remote" : "Cluster"),
d49186
+                      peer->uname, state_text(peer->state));
d49186
             break;
d49186
 
d49186
         case crm_status_processes:
d49186
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
d49186
                 gone = true;
d49186
             }
d49186
+            crm_debug("Node %s is %s a peer",
d49186
+                      peer->uname, (gone? "no longer" : "now"));
d49186
             break;
d49186
 
d49186
         case crm_status_nstate:
d49186
+            crm_debug("%s node %s is now %s (was %s)",
d49186
+                      (is_remote? "Remote" : "Cluster"),
d49186
+                      peer->uname, state_text(peer->state), state_text(data));
d49186
             if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
d49186
                 /* If we're the writer, send new peers a list of all attributes
d49186
                  * (unless it's a remote node, which doesn't run its own attrd)
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Mon, 7 Jun 2021 11:13:53 -0500
d49186
Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for
d49186
 attributes when known
d49186
d49186
In most cases, attribute updates contained the node ID, and the node ID was
d49186
used by other code, only if known (i.e. positive). However a couple places did
d49186
not check this, so add that.
d49186
d49186
I am unsure whether the missing check caused problems in practice, but there
d49186
appears to be the possibility that a remote node would wrongly be added to the
d49186
cluster node cache.
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 6 ++++--
d49186
 1 file changed, 4 insertions(+), 2 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index d6d179b..b3f441c 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -136,7 +136,9 @@ build_attribute_xml(
d49186
     crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
d49186
     crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
d49186
     crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
d49186
-    crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
d49186
+    if (peerid > 0) {
d49186
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
d49186
+    }
d49186
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
d49186
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
d49186
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
d49186
@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
d49186
     /* If this is a cluster node whose node ID we are learning, remember it */
d49186
     if ((v->nodeid == 0) && (v->is_remote == FALSE)
d49186
         && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
d49186
-                                  (int*)&v->nodeid) == 0)) {
d49186
+                                  (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
d49186
 
d49186
         crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
d49186
 
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Thu, 10 Jun 2021 17:25:57 -0500
d49186
Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a
d49186
 sync response
d49186
d49186
... for code isolation, and because we need to add more to it
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++--------------
d49186
 1 file changed, 39 insertions(+), 20 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index b3f441c..d02d3e6 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
d49186
 }
d49186
 
d49186
 /*!
d49186
+ * \internal
d49186
+ * \brief Load attributes from a peer sync response
d49186
+ *
d49186
+ * \param[in] peer      Peer that sent clear request
d49186
+ * \param[in] peer_won  Whether peer is the attribute writer
d49186
+ * \param[in] xml       Request XML
d49186
+ */
d49186
+static void
d49186
+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
d49186
+{
d49186
+    crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
d49186
+             peer->uname);
d49186
+
d49186
+    if (peer_won) {
d49186
+        /* Initialize the "seen" flag for all attributes to cleared, so we can
d49186
+         * detect attributes that local node has but the writer doesn't.
d49186
+         */
d49186
+        clear_attribute_value_seen();
d49186
+    }
d49186
+
d49186
+    // Process each attribute update in the sync response
d49186
+    for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
d49186
+         child = pcmk__xml_next(child)) {
d49186
+        attrd_peer_update(peer, child,
d49186
+                          crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
d49186
+                          TRUE);
d49186
+    }
d49186
+
d49186
+    if (peer_won) {
d49186
+        /* If any attributes are still not marked as seen, the writer doesn't
d49186
+         * know about them, so send all peers an update with them.
d49186
+         */
d49186
+        attrd_current_only_attribute_update(peer, xml);
d49186
+    }
d49186
+}
d49186
+
d49186
+/*!
d49186
     \internal
d49186
     \brief Broadcast private attribute for local node with protocol version
d49186
 */
d49186
@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
d49186
     const char *op = crm_element_value(xml, PCMK__XA_TASK);
d49186
     const char *election_op = crm_element_value(xml, F_CRM_TASK);
d49186
     const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
d49186
-    bool peer_won = FALSE;
d49186
+    bool peer_won = false;
d49186
 
d49186
     if (election_op) {
d49186
         attrd_handle_election_op(peer, xml);
d49186
@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
d49186
 
d49186
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei)
d49186
                && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
d49186
-        xmlNode *child = NULL;
d49186
-
d49186
-        crm_info("Processing %s from %s", op, peer->uname);
d49186
-
d49186
-        /* Clear the seen flag for attribute processing held only in the own node. */
d49186
-        if (peer_won) {
d49186
-            clear_attribute_value_seen();
d49186
-        }
d49186
-
d49186
-        for (child = pcmk__xml_first_child(xml); child != NULL;
d49186
-             child = pcmk__xml_next(child)) {
d49186
-            host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME);
d49186
-            attrd_peer_update(peer, child, host, TRUE);
d49186
-        }
d49186
-
d49186
-        if (peer_won) {
d49186
-            /* Synchronize if there is an attribute held only by own node that Writer does not have. */
d49186
-            attrd_current_only_attribute_update(peer, xml);
d49186
-        }
d49186
+        process_peer_sync_response(peer, peer_won, xml);
d49186
 
d49186
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_FLUSH, pcmk__str_casei)) {
d49186
         /* Ignore. The flush command was removed in 2.0.0 but may be
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Fri, 11 Jun 2021 14:40:39 -0500
d49186
Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting
d49186
 local override
d49186
d49186
... for code isolation
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++-------------
d49186
 1 file changed, 29 insertions(+), 13 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index d02d3e6..4783427 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
d49186
     free_xml(sync);
d49186
 }
d49186
 
d49186
+/*!
d49186
+ * \internal
d49186
+ * \brief Override an attribute sync with a local value
d49186
+ *
d49186
+ * Broadcast the local node's value for an attribute that's different from the
d49186
+ * value provided in a peer's attribute synchronization response. This ensures a
d49186
+ * node's values for itself take precedence and all peers are kept in sync.
d49186
+ *
d49186
+ * \param[in] a          Attribute entry to override
d49186
+ *
d49186
+ * \return Local instance of attribute value
d49186
+ */
d49186
+static attribute_value_t *
d49186
+broadcast_local_value(attribute_t *a)
d49186
+{
d49186
+    attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
d49186
+    xmlNode *sync = create_xml_node(NULL, __func__);
d49186
+
d49186
+    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
+    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
d49186
+                        a->user, a->is_private, v->nodename, v->nodeid,
d49186
+                        v->current, FALSE);
d49186
+    attrd_xml_add_writer(sync);
d49186
+    send_attrd_message(NULL, sync);
d49186
+    free_xml(sync);
d49186
+    return v;
d49186
+}
d49186
+
d49186
 void
d49186
 attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
d49186
 {
d49186
@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
d49186
     if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
d49186
         && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
d49186
 
d49186
-        xmlNode *sync = create_xml_node(NULL, __func__);
d49186
-
d49186
         crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
d49186
                    attr, host, v->current, value, peer->uname);
d49186
-
d49186
-        crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
-        v = g_hash_table_lookup(a->values, host);
d49186
-        build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user,
d49186
-                            a->is_private, v->nodename, v->nodeid, v->current, FALSE);
d49186
-
d49186
-        attrd_xml_add_writer(sync);
d49186
-
d49186
-        /* Broadcast in case any other nodes had the inconsistent value */
d49186
-        send_attrd_message(NULL, sync);
d49186
-        free_xml(sync);
d49186
+        v = broadcast_local_value(a);
d49186
 
d49186
     } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
d49186
         crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s",
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Fri, 11 Jun 2021 14:50:29 -0500
d49186
Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when
d49186
 broadcasting local-only values
d49186
d49186
The traces aren't necessary since build_attribute_xml() already logs the same
d49186
info at debug. Also, rename function for clarity, and make static.
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 35 ++++++++++++++++-------------------
d49186
 1 file changed, 16 insertions(+), 19 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 4783427..356defb 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -51,11 +51,12 @@ GHashTable *attributes = NULL;
d49186
 
d49186
 void write_attribute(attribute_t *a, bool ignore_delay);
d49186
 void write_or_elect_attribute(attribute_t *a);
d49186
-void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml);
d49186
 void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
d49186
 void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
d49186
 void attrd_peer_remove(const char *host, gboolean uncache, const char *source);
d49186
 
d49186
+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml);
d49186
+
d49186
 static gboolean
d49186
 send_attrd_message(crm_node_t * node, xmlNode * data)
d49186
 {
d49186
@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
d49186
         /* If any attributes are still not marked as seen, the writer doesn't
d49186
          * know about them, so send all peers an update with them.
d49186
          */
d49186
-        attrd_current_only_attribute_update(peer, xml);
d49186
+        broadcast_unseen_local_values(peer, xml);
d49186
     }
d49186
 }
d49186
 
d49186
@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
d49186
     return(v);
d49186
 }
d49186
 
d49186
-void 
d49186
-attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
d49186
+void
d49186
+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
d49186
 {
d49186
     GHashTableIter aIter;
d49186
     GHashTableIter vIter;
d49186
-    attribute_t *a;
d49186
+    attribute_t *a = NULL;
d49186
     attribute_value_t *v = NULL;
d49186
-    xmlNode *sync = create_xml_node(NULL, __func__);
d49186
-    gboolean build = FALSE;    
d49186
-
d49186
-    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
+    xmlNode *sync = NULL;
d49186
 
d49186
     g_hash_table_iter_init(&aIter, attributes);
d49186
     while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
d49186
         g_hash_table_iter_init(&vIter, a->values);
d49186
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
d49186
-            if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) {
d49186
-                crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current);
d49186
-
d49186
-                build = TRUE;
d49186
+            if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
d49186
+                                           pcmk__str_casei)) {
d49186
+                if (sync == NULL) {
d49186
+                    sync = create_xml_node(NULL, __func__);
d49186
+                    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
+                }
d49186
                 build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
d49186
                             v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
d49186
-            } else {
d49186
-                crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname);
d49186
-                continue;
d49186
             }
d49186
         }
d49186
     }
d49186
 
d49186
-    if (build) {
d49186
-        crm_debug("Syncing values to everyone.(from local only attributes)");
d49186
+    if (sync != NULL) {
d49186
+        crm_debug("Broadcasting local-only values");
d49186
         send_attrd_message(NULL, sync);
d49186
+        free_xml(sync);
d49186
     }
d49186
-    free_xml(sync);
d49186
 }
d49186
 
d49186
 /*!
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Fri, 11 Jun 2021 15:04:20 -0500
d49186
Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML
d49186
 creation function
d49186
d49186
... and rename for clarity
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------
d49186
 1 file changed, 27 insertions(+), 21 deletions(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 356defb..5b32a77 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name)
d49186
     CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
d49186
 }
d49186
 
d49186
+/*!
d49186
+ * \internal
d49186
+ * \brief Create an XML representation of an attribute for use in peer messages
d49186
+ *
d49186
+ * \param[in] parent       Create attribute XML as child element of this element
d49186
+ * \param[in] a            Attribute to represent
d49186
+ * \param[in] v            Attribute value to represent
d49186
+ * \param[in] force_write  If true, value should be written even if unchanged
d49186
+ *
d49186
+ * \return XML representation of attribute
d49186
+ */
d49186
 static xmlNode *
d49186
-build_attribute_xml(
d49186
-    xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
d49186
-    gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write)
d49186
+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
d49186
+                        bool force_write)
d49186
 {
d49186
     xmlNode *xml = create_xml_node(parent, __func__);
d49186
 
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_NAME, name);
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_SET, set);
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
d49186
-    if (peerid > 0) {
d49186
-        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename);
d49186
+    if (v->nodeid > 0) {
d49186
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
d49186
     }
d49186
-    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
d49186
-    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
d49186
-    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
d49186
-    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write);
d49186
+    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
d49186
+    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
d49186
+    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
d49186
+    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
d49186
 
d49186
     return xml;
d49186
 }
d49186
@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
d49186
         g_hash_table_iter_init(&vIter, a->values);
d49186
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
d49186
             crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
d49186
-            build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
d49186
-                                v->nodename, v->nodeid, v->current, FALSE);
d49186
+            add_attribute_value_xml(sync, a, v, false);
d49186
         }
d49186
     }
d49186
 
d49186
@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
d49186
                     sync = create_xml_node(NULL, __func__);
d49186
                     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
                 }
d49186
-                build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
d49186
-                            v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
d49186
+                add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer);
d49186
             }
d49186
         }
d49186
     }
d49186
@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a)
d49186
     xmlNode *sync = create_xml_node(NULL, __func__);
d49186
 
d49186
     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
d49186
-    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
d49186
-                        a->user, a->is_private, v->nodename, v->nodeid,
d49186
-                        v->current, FALSE);
d49186
+    add_attribute_value_xml(sync, a, v, false);
d49186
     attrd_xml_add_writer(sync);
d49186
     send_attrd_message(NULL, sync);
d49186
     free_xml(sync);
d49186
-- 
d49186
1.8.3.1
d49186
d49186
d49186
From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001
d49186
From: Ken Gaillot <kgaillot@redhat.com>
d49186
Date: Fri, 4 Jun 2021 16:34:26 -0500
d49186
Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of
d49186
 remote nodes
d49186
d49186
The attribute manager can't record a remote node's attributes to the CIB until
d49186
it knows the node is remote. Normally, this is learned when the remote node
d49186
starts, because the controller clears the CRM_OP_PROBED attribute and indicates
d49186
that it is for a remote node.
d49186
d49186
However, if a cluster node is down when a remote node starts, and later comes
d49186
up, it learns the remote node's existing attributes as part of the attribute
d49186
sync. Previously, this did not include whether each value is for a cluster or
d49186
remote node, so the newly joined attribute manager couldn't write out remote
d49186
nodes' attributes until it learned that via some other event -- which might not
d49186
happen before the node becomes DC, in which case its scheduler will not see any
d49186
unfencing-related node attributes and may wrongly schedule unfencing.
d49186
d49186
The sync response handling already calls attrd_lookup_or_create_value(), which
d49186
checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync
d49186
response.
d49186
---
d49186
 daemons/attrd/attrd_commands.c | 6 +++++-
d49186
 1 file changed, 5 insertions(+), 1 deletion(-)
d49186
d49186
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
d49186
index 5b32a77..0142383 100644
d49186
--- a/daemons/attrd/attrd_commands.c
d49186
+++ b/daemons/attrd/attrd_commands.c
d49186
@@ -43,8 +43,9 @@
d49186
  *     1       1.1.15   PCMK__ATTRD_CMD_UPDATE_BOTH,
d49186
  *                      PCMK__ATTRD_CMD_UPDATE_DELAY
d49186
  *     2       1.1.17   PCMK__ATTRD_CMD_CLEAR_FAILURE
d49186
+ *     3       2.1.1    PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
d49186
  */
d49186
-#define ATTRD_PROTOCOL_VERSION "2"
d49186
+#define ATTRD_PROTOCOL_VERSION "3"
d49186
 
d49186
 int last_cib_op_done = 0;
d49186
 GHashTable *attributes = NULL;
d49186
@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
d49186
     if (v->nodeid > 0) {
d49186
         crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
d49186
     }
d49186
+    if (v->is_remote != 0) {
d49186
+        crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
d49186
+    }
d49186
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
d49186
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
d49186
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
d49186
-- 
d49186
1.8.3.1
d49186