Blame SOURCES/007-unfencing-loop.patch

599c7d
From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Fri, 4 Jun 2021 16:30:55 -0500
599c7d
Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote
599c7d
 node
599c7d
599c7d
... so the controller can indicate the node is remote (if known at that point,
599c7d
which is not guaranteed) when setting unfencing-related node attributes.
599c7d
---
599c7d
 daemons/controld/controld_fencing.c | 21 ++++++++++++++++++---
599c7d
 1 file changed, 18 insertions(+), 3 deletions(-)
599c7d
599c7d
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
599c7d
index 23dff28..0fba661 100644
599c7d
--- a/daemons/controld/controld_fencing.c
599c7d
+++ b/daemons/controld/controld_fencing.c
599c7d
@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
599c7d
             if (pcmk__str_eq("on", op, pcmk__str_casei)) {
599c7d
                 const char *value = NULL;
599c7d
                 char *now = pcmk__ttoa(time(NULL));
599c7d
+                gboolean is_remote_node = FALSE;
599c7d
+
599c7d
+                /* This check is not 100% reliable, since this node is not
599c7d
+                 * guaranteed to have the remote node cached. However, it
599c7d
+                 * doesn't have to be reliable, since the attribute manager can
599c7d
+                 * learn a node's "remoteness" by other means sooner or later.
599c7d
+                 * This allows it to learn more quickly if this node does have
599c7d
+                 * the information.
599c7d
+                 */
599c7d
+                if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
599c7d
+                    is_remote_node = TRUE;
599c7d
+                }
599c7d
 
599c7d
-                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
599c7d
+                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
599c7d
+                             is_remote_node);
599c7d
                 free(now);
599c7d
 
599c7d
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
599c7d
-                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
599c7d
+                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
599c7d
+                             is_remote_node);
599c7d
 
599c7d
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
599c7d
-                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
599c7d
+                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
599c7d
+                             is_remote_node);
599c7d
 
599c7d
             } else if (action->sent_update == FALSE) {
599c7d
                 send_stonith_update(action, target, uuid);
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 7 Jun 2021 10:50:36 -0500
599c7d
Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote
599c7d
 node to cache
599c7d
599c7d
... for future reuse
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++-----------
599c7d
 1 file changed, 23 insertions(+), 11 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 731c243..93a165b 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -102,6 +102,28 @@ free_attribute(gpointer data)
599c7d
     }
599c7d
 }
599c7d
 
599c7d
+/*!
599c7d
+ * \internal
599c7d
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
599c7d
+ *
599c7d
+ * \param[in]
599c7d
+ */
599c7d
+static void
599c7d
+cache_remote_node(const char *node_name)
599c7d
+{
599c7d
+    /* If we previously assumed this node was an unseen cluster node,
599c7d
+     * remove its entry from the cluster peer cache.
599c7d
+     */
599c7d
+    crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name);
599c7d
+
599c7d
+    if (dup && (dup->uuid == NULL)) {
599c7d
+        reap_crm_member(0, node_name);
599c7d
+    }
599c7d
+
599c7d
+    // Ensure node is in the remote peer cache
599c7d
+    CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
599c7d
+}
599c7d
+
599c7d
 static xmlNode *
599c7d
 build_attribute_xml(
599c7d
     xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
599c7d
@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
599c7d
 
599c7d
     crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
599c7d
     if (is_remote) {
599c7d
-        /* If we previously assumed this node was an unseen cluster node,
599c7d
-         * remove its entry from the cluster peer cache.
599c7d
-         */
599c7d
-        crm_node_t *dup = pcmk__search_cluster_node_cache(0, host);
599c7d
-
599c7d
-        if (dup && (dup->uuid == NULL)) {
599c7d
-            reap_crm_member(0, host);
599c7d
-        }
599c7d
-
599c7d
-        /* Ensure this host is in the remote peer cache */
599c7d
-        CRM_ASSERT(crm_remote_peer_get(host) != NULL);
599c7d
+        cache_remote_node(host);
599c7d
     }
599c7d
 
599c7d
     if (v == NULL) {
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 7 Jun 2021 10:39:34 -0500
599c7d
Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes
599c7d
 for remote nodes
599c7d
599c7d
Remote nodes never vote.
599c7d
599c7d
This has no effect in practice since the removal would simply do nothing,
599c7d
but we might as well not waste time trying.
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 11 ++++++-----
599c7d
 1 file changed, 6 insertions(+), 5 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 93a165b..dbe777e 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data)
599c7d
 void
599c7d
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
599c7d
 {
599c7d
-    bool remove_voter = FALSE;
599c7d
+    bool gone = false;
599c7d
+    bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
599c7d
 
599c7d
     switch (kind) {
599c7d
         case crm_status_uname:
599c7d
@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
599c7d
 
599c7d
         case crm_status_processes:
599c7d
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
599c7d
-                remove_voter = TRUE;
599c7d
+                gone = true;
599c7d
             }
599c7d
             break;
599c7d
 
599c7d
@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
599c7d
             } else {
599c7d
                 // Remove all attribute values associated with lost nodes
599c7d
                 attrd_peer_remove(peer->uname, FALSE, "loss");
599c7d
-                remove_voter = TRUE;
599c7d
+                gone = true;
599c7d
             }
599c7d
             break;
599c7d
     }
599c7d
 
599c7d
-    // In case an election is in progress, remove any vote by the node
599c7d
-    if (remove_voter) {
599c7d
+    // Remove votes from cluster nodes that leave, in case election in progress
599c7d
+    if (gone && !is_remote) {
599c7d
         attrd_remove_voter(peer);
599c7d
     }
599c7d
 }
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 7 Jun 2021 10:40:06 -0500
599c7d
Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer
599c7d
 update callback
599c7d
599c7d
If a remote node was started before the local cluster node joined the cluster,
599c7d
the cluster node will assume its node attributes are for a cluster node until
599c7d
it learns otherwise. Check for remoteness in the peer update callback, to have
599c7d
another way we can learn it.
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 4 ++++
599c7d
 1 file changed, 4 insertions(+)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index dbe777e..5f6a754 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
599c7d
     // Remove votes from cluster nodes that leave, in case election in progress
599c7d
     if (gone && !is_remote) {
599c7d
         attrd_remove_voter(peer);
599c7d
+
599c7d
+    // Ensure remote nodes that come up are in the remote node cache
599c7d
+    } else if (!gone && is_remote) {
599c7d
+        cache_remote_node(peer->uname);
599c7d
     }
599c7d
 }
599c7d
 
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 7 Jun 2021 11:29:12 -0500
599c7d
Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes
599c7d
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 9 +++++++++
599c7d
 1 file changed, 9 insertions(+)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 5f6a754..d6d179b 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data)
599c7d
     return FALSE;
599c7d
 }
599c7d
 
599c7d
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
599c7d
 
599c7d
 void
599c7d
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
599c7d
@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
599c7d
 
599c7d
     switch (kind) {
599c7d
         case crm_status_uname:
599c7d
+            crm_debug("%s node %s is now %s",
599c7d
+                      (is_remote? "Remote" : "Cluster"),
599c7d
+                      peer->uname, state_text(peer->state));
599c7d
             break;
599c7d
 
599c7d
         case crm_status_processes:
599c7d
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
599c7d
                 gone = true;
599c7d
             }
599c7d
+            crm_debug("Node %s is %s a peer",
599c7d
+                      peer->uname, (gone? "no longer" : "now"));
599c7d
             break;
599c7d
 
599c7d
         case crm_status_nstate:
599c7d
+            crm_debug("%s node %s is now %s (was %s)",
599c7d
+                      (is_remote? "Remote" : "Cluster"),
599c7d
+                      peer->uname, state_text(peer->state), state_text(data));
599c7d
             if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
599c7d
                 /* If we're the writer, send new peers a list of all attributes
599c7d
                  * (unless it's a remote node, which doesn't run its own attrd)
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Mon, 7 Jun 2021 11:13:53 -0500
599c7d
Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for
599c7d
 attributes when known
599c7d
599c7d
In most cases, attribute updates contained the node ID, and the node ID was
599c7d
used by other code, only if known (i.e. positive). However a couple places did
599c7d
not check this, so add that.
599c7d
599c7d
I am unsure whether the missing check caused problems in practice, but there
599c7d
appears to be the possibility that a remote node would wrongly be added to the
599c7d
cluster node cache.
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 6 ++++--
599c7d
 1 file changed, 4 insertions(+), 2 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index d6d179b..b3f441c 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -136,7 +136,9 @@ build_attribute_xml(
599c7d
     crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
599c7d
     crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
599c7d
     crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
599c7d
-    crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
599c7d
+    if (peerid > 0) {
599c7d
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
599c7d
+    }
599c7d
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
599c7d
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
599c7d
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
599c7d
@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
599c7d
     /* If this is a cluster node whose node ID we are learning, remember it */
599c7d
     if ((v->nodeid == 0) && (v->is_remote == FALSE)
599c7d
         && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
599c7d
-                                  (int*)&v->nodeid) == 0)) {
599c7d
+                                  (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
599c7d
 
599c7d
         crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
599c7d
 
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Thu, 10 Jun 2021 17:25:57 -0500
599c7d
Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a
599c7d
 sync response
599c7d
599c7d
... for code isolation, and because we need to add more to it
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++--------------
599c7d
 1 file changed, 39 insertions(+), 20 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index b3f441c..d02d3e6 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
599c7d
 }
599c7d
 
599c7d
 /*!
599c7d
+ * \internal
599c7d
+ * \brief Load attributes from a peer sync response
599c7d
+ *
599c7d
+ * \param[in] peer      Peer that sent clear request
599c7d
+ * \param[in] peer_won  Whether peer is the attribute writer
599c7d
+ * \param[in] xml       Request XML
599c7d
+ */
599c7d
+static void
599c7d
+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
599c7d
+{
599c7d
+    crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
599c7d
+             peer->uname);
599c7d
+
599c7d
+    if (peer_won) {
599c7d
+        /* Initialize the "seen" flag for all attributes to cleared, so we can
599c7d
+         * detect attributes that local node has but the writer doesn't.
599c7d
+         */
599c7d
+        clear_attribute_value_seen();
599c7d
+    }
599c7d
+
599c7d
+    // Process each attribute update in the sync response
599c7d
+    for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
599c7d
+         child = pcmk__xml_next(child)) {
599c7d
+        attrd_peer_update(peer, child,
599c7d
+                          crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
599c7d
+                          TRUE);
599c7d
+    }
599c7d
+
599c7d
+    if (peer_won) {
599c7d
+        /* If any attributes are still not marked as seen, the writer doesn't
599c7d
+         * know about them, so send all peers an update with them.
599c7d
+         */
599c7d
+        attrd_current_only_attribute_update(peer, xml);
599c7d
+    }
599c7d
+}
599c7d
+
599c7d
+/*!
599c7d
     \internal
599c7d
     \brief Broadcast private attribute for local node with protocol version
599c7d
 */
599c7d
@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
599c7d
     const char *op = crm_element_value(xml, PCMK__XA_TASK);
599c7d
     const char *election_op = crm_element_value(xml, F_CRM_TASK);
599c7d
     const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
599c7d
-    bool peer_won = FALSE;
599c7d
+    bool peer_won = false;
599c7d
 
599c7d
     if (election_op) {
599c7d
         attrd_handle_election_op(peer, xml);
599c7d
@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
599c7d
 
599c7d
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei)
599c7d
                && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
599c7d
-        xmlNode *child = NULL;
599c7d
-
599c7d
-        crm_info("Processing %s from %s", op, peer->uname);
599c7d
-
599c7d
-        /* Clear the seen flag for attribute processing held only in the own node. */
599c7d
-        if (peer_won) {
599c7d
-            clear_attribute_value_seen();
599c7d
-        }
599c7d
-
599c7d
-        for (child = pcmk__xml_first_child(xml); child != NULL;
599c7d
-             child = pcmk__xml_next(child)) {
599c7d
-            host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME);
599c7d
-            attrd_peer_update(peer, child, host, TRUE);
599c7d
-        }
599c7d
-
599c7d
-        if (peer_won) {
599c7d
-            /* Synchronize if there is an attribute held only by own node that Writer does not have. */
599c7d
-            attrd_current_only_attribute_update(peer, xml);
599c7d
-        }
599c7d
+        process_peer_sync_response(peer, peer_won, xml);
599c7d
 
599c7d
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_FLUSH, pcmk__str_casei)) {
599c7d
         /* Ignore. The flush command was removed in 2.0.0 but may be
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Fri, 11 Jun 2021 14:40:39 -0500
599c7d
Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting
599c7d
 local override
599c7d
599c7d
... for code isolation
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++-------------
599c7d
 1 file changed, 29 insertions(+), 13 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index d02d3e6..4783427 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
599c7d
     free_xml(sync);
599c7d
 }
599c7d
 
599c7d
+/*!
599c7d
+ * \internal
599c7d
+ * \brief Override an attribute sync with a local value
599c7d
+ *
599c7d
+ * Broadcast the local node's value for an attribute that's different from the
599c7d
+ * value provided in a peer's attribute synchronization response. This ensures a
599c7d
+ * node's values for itself take precedence and all peers are kept in sync.
599c7d
+ *
599c7d
+ * \param[in] a          Attribute entry to override
599c7d
+ *
599c7d
+ * \return Local instance of attribute value
599c7d
+ */
599c7d
+static attribute_value_t *
599c7d
+broadcast_local_value(attribute_t *a)
599c7d
+{
599c7d
+    attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
599c7d
+    xmlNode *sync = create_xml_node(NULL, __func__);
599c7d
+
599c7d
+    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
+    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
599c7d
+                        a->user, a->is_private, v->nodename, v->nodeid,
599c7d
+                        v->current, FALSE);
599c7d
+    attrd_xml_add_writer(sync);
599c7d
+    send_attrd_message(NULL, sync);
599c7d
+    free_xml(sync);
599c7d
+    return v;
599c7d
+}
599c7d
+
599c7d
 void
599c7d
 attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
599c7d
 {
599c7d
@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
599c7d
     if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
599c7d
         && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
599c7d
 
599c7d
-        xmlNode *sync = create_xml_node(NULL, __func__);
599c7d
-
599c7d
         crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
599c7d
                    attr, host, v->current, value, peer->uname);
599c7d
-
599c7d
-        crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
-        v = g_hash_table_lookup(a->values, host);
599c7d
-        build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user,
599c7d
-                            a->is_private, v->nodename, v->nodeid, v->current, FALSE);
599c7d
-
599c7d
-        attrd_xml_add_writer(sync);
599c7d
-
599c7d
-        /* Broadcast in case any other nodes had the inconsistent value */
599c7d
-        send_attrd_message(NULL, sync);
599c7d
-        free_xml(sync);
599c7d
+        v = broadcast_local_value(a);
599c7d
 
599c7d
     } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
599c7d
         crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s",
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Fri, 11 Jun 2021 14:50:29 -0500
599c7d
Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when
599c7d
 broadcasting local-only values
599c7d
599c7d
The traces aren't necessary since build_attribute_xml() already logs the same
599c7d
info at debug. Also, rename function for clarity, and make static.
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 35 ++++++++++++++++-------------------
599c7d
 1 file changed, 16 insertions(+), 19 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 4783427..356defb 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -51,11 +51,12 @@ GHashTable *attributes = NULL;
599c7d
 
599c7d
 void write_attribute(attribute_t *a, bool ignore_delay);
599c7d
 void write_or_elect_attribute(attribute_t *a);
599c7d
-void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml);
599c7d
 void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
599c7d
 void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
599c7d
 void attrd_peer_remove(const char *host, gboolean uncache, const char *source);
599c7d
 
599c7d
+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml);
599c7d
+
599c7d
 static gboolean
599c7d
 send_attrd_message(crm_node_t * node, xmlNode * data)
599c7d
 {
599c7d
@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
599c7d
         /* If any attributes are still not marked as seen, the writer doesn't
599c7d
          * know about them, so send all peers an update with them.
599c7d
          */
599c7d
-        attrd_current_only_attribute_update(peer, xml);
599c7d
+        broadcast_unseen_local_values(peer, xml);
599c7d
     }
599c7d
 }
599c7d
 
599c7d
@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
599c7d
     return(v);
599c7d
 }
599c7d
 
599c7d
-void 
599c7d
-attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
599c7d
+void
599c7d
+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
599c7d
 {
599c7d
     GHashTableIter aIter;
599c7d
     GHashTableIter vIter;
599c7d
-    attribute_t *a;
599c7d
+    attribute_t *a = NULL;
599c7d
     attribute_value_t *v = NULL;
599c7d
-    xmlNode *sync = create_xml_node(NULL, __func__);
599c7d
-    gboolean build = FALSE;    
599c7d
-
599c7d
-    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
+    xmlNode *sync = NULL;
599c7d
 
599c7d
     g_hash_table_iter_init(&aIter, attributes);
599c7d
     while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
599c7d
         g_hash_table_iter_init(&vIter, a->values);
599c7d
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
599c7d
-            if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) {
599c7d
-                crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current);
599c7d
-
599c7d
-                build = TRUE;
599c7d
+            if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
599c7d
+                                           pcmk__str_casei)) {
599c7d
+                if (sync == NULL) {
599c7d
+                    sync = create_xml_node(NULL, __func__);
599c7d
+                    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
+                }
599c7d
                 build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
599c7d
                             v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
599c7d
-            } else {
599c7d
-                crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname);
599c7d
-                continue;
599c7d
             }
599c7d
         }
599c7d
     }
599c7d
 
599c7d
-    if (build) {
599c7d
-        crm_debug("Syncing values to everyone.(from local only attributes)");
599c7d
+    if (sync != NULL) {
599c7d
+        crm_debug("Broadcasting local-only values");
599c7d
         send_attrd_message(NULL, sync);
599c7d
+        free_xml(sync);
599c7d
     }
599c7d
-    free_xml(sync);
599c7d
 }
599c7d
 
599c7d
 /*!
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Fri, 11 Jun 2021 15:04:20 -0500
599c7d
Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML
599c7d
 creation function
599c7d
599c7d
... and rename for clarity
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------
599c7d
 1 file changed, 27 insertions(+), 21 deletions(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 356defb..5b32a77 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name)
599c7d
     CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
599c7d
 }
599c7d
 
599c7d
+/*!
599c7d
+ * \internal
599c7d
+ * \brief Create an XML representation of an attribute for use in peer messages
599c7d
+ *
599c7d
+ * \param[in] parent       Create attribute XML as child element of this element
599c7d
+ * \param[in] a            Attribute to represent
599c7d
+ * \param[in] v            Attribute value to represent
599c7d
+ * \param[in] force_write  If true, value should be written even if unchanged
599c7d
+ *
599c7d
+ * \return XML representation of attribute
599c7d
+ */
599c7d
 static xmlNode *
599c7d
-build_attribute_xml(
599c7d
-    xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
599c7d
-    gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write)
599c7d
+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
599c7d
+                        bool force_write)
599c7d
 {
599c7d
     xmlNode *xml = create_xml_node(parent, __func__);
599c7d
 
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_NAME, name);
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_SET, set);
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
599c7d
-    if (peerid > 0) {
599c7d
-        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename);
599c7d
+    if (v->nodeid > 0) {
599c7d
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
599c7d
     }
599c7d
-    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
599c7d
-    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
599c7d
-    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
599c7d
-    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write);
599c7d
+    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
599c7d
+    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
599c7d
+    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
599c7d
+    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
599c7d
 
599c7d
     return xml;
599c7d
 }
599c7d
@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
599c7d
         g_hash_table_iter_init(&vIter, a->values);
599c7d
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
599c7d
             crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
599c7d
-            build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
599c7d
-                                v->nodename, v->nodeid, v->current, FALSE);
599c7d
+            add_attribute_value_xml(sync, a, v, false);
599c7d
         }
599c7d
     }
599c7d
 
599c7d
@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
599c7d
                     sync = create_xml_node(NULL, __func__);
599c7d
                     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
                 }
599c7d
-                build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
599c7d
-                            v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
599c7d
+                add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer);
599c7d
             }
599c7d
         }
599c7d
     }
599c7d
@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a)
599c7d
     xmlNode *sync = create_xml_node(NULL, __func__);
599c7d
 
599c7d
     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
599c7d
-    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
599c7d
-                        a->user, a->is_private, v->nodename, v->nodeid,
599c7d
-                        v->current, FALSE);
599c7d
+    add_attribute_value_xml(sync, a, v, false);
599c7d
     attrd_xml_add_writer(sync);
599c7d
     send_attrd_message(NULL, sync);
599c7d
     free_xml(sync);
599c7d
-- 
599c7d
1.8.3.1
599c7d
599c7d
599c7d
From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001
599c7d
From: Ken Gaillot <kgaillot@redhat.com>
599c7d
Date: Fri, 4 Jun 2021 16:34:26 -0500
599c7d
Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of
599c7d
 remote nodes
599c7d
599c7d
The attribute manager can't record a remote node's attributes to the CIB until
599c7d
it knows the node is remote. Normally, this is learned when the remote node
599c7d
starts, because the controller clears the CRM_OP_PROBED attribute and indicates
599c7d
that it is for a remote node.
599c7d
599c7d
However, if a cluster node is down when a remote node starts, and later comes
599c7d
up, it learns the remote node's existing attributes as part of the attribute
599c7d
sync. Previously, this did not include whether each value is for a cluster or
599c7d
remote node, so the newly joined attribute manager couldn't write out remote
599c7d
nodes' attributes until it learned that via some other event -- which might not
599c7d
happen before the node becomes DC, in which case its scheduler will not see any
599c7d
unfencing-related node attributes and may wrongly schedule unfencing.
599c7d
599c7d
The sync response handling already calls attrd_lookup_or_create_value(), which
599c7d
checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync
599c7d
response.
599c7d
---
599c7d
 daemons/attrd/attrd_commands.c | 6 +++++-
599c7d
 1 file changed, 5 insertions(+), 1 deletion(-)
599c7d
599c7d
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
599c7d
index 5b32a77..0142383 100644
599c7d
--- a/daemons/attrd/attrd_commands.c
599c7d
+++ b/daemons/attrd/attrd_commands.c
599c7d
@@ -43,8 +43,9 @@
599c7d
  *     1       1.1.15   PCMK__ATTRD_CMD_UPDATE_BOTH,
599c7d
  *                      PCMK__ATTRD_CMD_UPDATE_DELAY
599c7d
  *     2       1.1.17   PCMK__ATTRD_CMD_CLEAR_FAILURE
599c7d
+ *     3       2.1.1    PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
599c7d
  */
599c7d
-#define ATTRD_PROTOCOL_VERSION "2"
599c7d
+#define ATTRD_PROTOCOL_VERSION "3"
599c7d
 
599c7d
 int last_cib_op_done = 0;
599c7d
 GHashTable *attributes = NULL;
599c7d
@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
599c7d
     if (v->nodeid > 0) {
599c7d
         crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
599c7d
     }
599c7d
+    if (v->is_remote != 0) {
599c7d
+        crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
599c7d
+    }
599c7d
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
599c7d
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
599c7d
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
599c7d
-- 
599c7d
1.8.3.1
599c7d