Blame SOURCES/007-unfencing-loop.patch

c2941a
From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Fri, 4 Jun 2021 16:30:55 -0500
c2941a
Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote
c2941a
 node
c2941a
c2941a
... so the controller can indicate the node is remote (if known at that point,
c2941a
which is not guaranteed) when setting unfencing-related node attributes.
c2941a
---
c2941a
 daemons/controld/controld_fencing.c | 21 ++++++++++++++++++---
c2941a
 1 file changed, 18 insertions(+), 3 deletions(-)
c2941a
c2941a
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
c2941a
index 23dff28..0fba661 100644
c2941a
--- a/daemons/controld/controld_fencing.c
c2941a
+++ b/daemons/controld/controld_fencing.c
c2941a
@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
c2941a
             if (pcmk__str_eq("on", op, pcmk__str_casei)) {
c2941a
                 const char *value = NULL;
6c49ac
                 char *now = pcmk__ttoa(time(NULL));
c2941a
+                gboolean is_remote_node = FALSE;
c2941a
+
c2941a
+                /* This check is not 100% reliable, since this node is not
c2941a
+                 * guaranteed to have the remote node cached. However, it
c2941a
+                 * doesn't have to be reliable, since the attribute manager can
c2941a
+                 * learn a node's "remoteness" by other means sooner or later.
c2941a
+                 * This allows it to learn more quickly if this node does have
c2941a
+                 * the information.
c2941a
+                 */
c2941a
+                if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
c2941a
+                    is_remote_node = TRUE;
c2941a
+                }
c2941a
 
c2941a
-                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
c2941a
+                update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
c2941a
+                             is_remote_node);
c2941a
                 free(now);
c2941a
 
c2941a
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
c2941a
-                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
c2941a
+                update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
c2941a
+                             is_remote_node);
c2941a
 
c2941a
                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
c2941a
-                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
c2941a
+                update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
c2941a
+                             is_remote_node);
c2941a
 
c2941a
             } else if (action->sent_update == FALSE) {
c2941a
                 send_stonith_update(action, target, uuid);
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Mon, 7 Jun 2021 10:50:36 -0500
c2941a
Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote
c2941a
 node to cache
c2941a
c2941a
... for future reuse
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++-----------
c2941a
 1 file changed, 23 insertions(+), 11 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 731c243..93a165b 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -102,6 +102,28 @@ free_attribute(gpointer data)
c2941a
     }
c2941a
 }
c2941a
 
c2941a
+/*!
c2941a
+ * \internal
c2941a
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
c2941a
+ *
c2941a
+ * \param[in]
c2941a
+ */
c2941a
+static void
c2941a
+cache_remote_node(const char *node_name)
c2941a
+{
c2941a
+    /* If we previously assumed this node was an unseen cluster node,
c2941a
+     * remove its entry from the cluster peer cache.
c2941a
+     */
6c49ac
+    crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name);
c2941a
+
c2941a
+    if (dup && (dup->uuid == NULL)) {
c2941a
+        reap_crm_member(0, node_name);
c2941a
+    }
c2941a
+
c2941a
+    // Ensure node is in the remote peer cache
c2941a
+    CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
c2941a
+}
c2941a
+
c2941a
 static xmlNode *
c2941a
 build_attribute_xml(
c2941a
     xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
c2941a
@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
c2941a
 
c2941a
     crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
c2941a
     if (is_remote) {
c2941a
-        /* If we previously assumed this node was an unseen cluster node,
c2941a
-         * remove its entry from the cluster peer cache.
c2941a
-         */
6c49ac
-        crm_node_t *dup = pcmk__search_cluster_node_cache(0, host);
c2941a
-
c2941a
-        if (dup && (dup->uuid == NULL)) {
c2941a
-            reap_crm_member(0, host);
c2941a
-        }
c2941a
-
c2941a
-        /* Ensure this host is in the remote peer cache */
c2941a
-        CRM_ASSERT(crm_remote_peer_get(host) != NULL);
c2941a
+        cache_remote_node(host);
c2941a
     }
c2941a
 
c2941a
     if (v == NULL) {
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Mon, 7 Jun 2021 10:39:34 -0500
c2941a
Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes
c2941a
 for remote nodes
c2941a
c2941a
Remote nodes never vote.
c2941a
c2941a
This has no effect in practice since the removal would simply do nothing,
c2941a
but we might as well not waste time trying.
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 11 ++++++-----
c2941a
 1 file changed, 6 insertions(+), 5 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 93a165b..dbe777e 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data)
c2941a
 void
c2941a
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
c2941a
 {
c2941a
-    bool remove_voter = FALSE;
c2941a
+    bool gone = false;
c2941a
+    bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
c2941a
 
c2941a
     switch (kind) {
c2941a
         case crm_status_uname:
c2941a
@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
c2941a
 
c2941a
         case crm_status_processes:
c2941a
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
c2941a
-                remove_voter = TRUE;
c2941a
+                gone = true;
c2941a
             }
c2941a
             break;
c2941a
 
c2941a
@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
c2941a
             } else {
c2941a
                 // Remove all attribute values associated with lost nodes
c2941a
                 attrd_peer_remove(peer->uname, FALSE, "loss");
c2941a
-                remove_voter = TRUE;
c2941a
+                gone = true;
c2941a
             }
c2941a
             break;
c2941a
     }
c2941a
 
c2941a
-    // In case an election is in progress, remove any vote by the node
c2941a
-    if (remove_voter) {
c2941a
+    // Remove votes from cluster nodes that leave, in case election in progress
c2941a
+    if (gone && !is_remote) {
c2941a
         attrd_remove_voter(peer);
c2941a
     }
c2941a
 }
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Mon, 7 Jun 2021 10:40:06 -0500
c2941a
Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer
c2941a
 update callback
c2941a
c2941a
If a remote node was started before the local cluster node joined the cluster,
c2941a
the cluster node will assume its node attributes are for a cluster node until
c2941a
it learns otherwise. Check for remoteness in the peer update callback, to have
c2941a
another way we can learn it.
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 4 ++++
c2941a
 1 file changed, 4 insertions(+)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index dbe777e..5f6a754 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
c2941a
     // Remove votes from cluster nodes that leave, in case election in progress
c2941a
     if (gone && !is_remote) {
c2941a
         attrd_remove_voter(peer);
c2941a
+
c2941a
+    // Ensure remote nodes that come up are in the remote node cache
c2941a
+    } else if (!gone && is_remote) {
c2941a
+        cache_remote_node(peer->uname);
c2941a
     }
c2941a
 }
c2941a
 
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Mon, 7 Jun 2021 11:29:12 -0500
c2941a
Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes
c2941a
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 9 +++++++++
c2941a
 1 file changed, 9 insertions(+)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 5f6a754..d6d179b 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data)
c2941a
     return FALSE;
c2941a
 }
c2941a
 
c2941a
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
c2941a
 
c2941a
 void
c2941a
 attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
c2941a
@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
c2941a
 
c2941a
     switch (kind) {
c2941a
         case crm_status_uname:
c2941a
+            crm_debug("%s node %s is now %s",
c2941a
+                      (is_remote? "Remote" : "Cluster"),
c2941a
+                      peer->uname, state_text(peer->state));
c2941a
             break;
c2941a
 
c2941a
         case crm_status_processes:
c2941a
             if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
c2941a
                 gone = true;
c2941a
             }
c2941a
+            crm_debug("Node %s is %s a peer",
c2941a
+                      peer->uname, (gone? "no longer" : "now"));
c2941a
             break;
c2941a
 
c2941a
         case crm_status_nstate:
c2941a
+            crm_debug("%s node %s is now %s (was %s)",
c2941a
+                      (is_remote? "Remote" : "Cluster"),
c2941a
+                      peer->uname, state_text(peer->state), state_text(data));
c2941a
             if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
c2941a
                 /* If we're the writer, send new peers a list of all attributes
c2941a
                  * (unless it's a remote node, which doesn't run its own attrd)
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Mon, 7 Jun 2021 11:13:53 -0500
c2941a
Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for
c2941a
 attributes when known
c2941a
c2941a
In most cases, attribute updates contained the node ID, and the node ID was
c2941a
used by other code, only if known (i.e. positive). However a couple places did
c2941a
not check this, so add that.
c2941a
c2941a
I am unsure whether the missing check caused problems in practice, but there
c2941a
appears to be the possibility that a remote node would wrongly be added to the
c2941a
cluster node cache.
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 6 ++++--
c2941a
 1 file changed, 4 insertions(+), 2 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index d6d179b..b3f441c 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -136,7 +136,9 @@ build_attribute_xml(
c2941a
     crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
c2941a
     crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
c2941a
     crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
c2941a
-    crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
c2941a
+    if (peerid > 0) {
c2941a
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
c2941a
+    }
c2941a
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
c2941a
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
c2941a
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
c2941a
@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
c2941a
     /* If this is a cluster node whose node ID we are learning, remember it */
c2941a
     if ((v->nodeid == 0) && (v->is_remote == FALSE)
c2941a
         && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
c2941a
-                                  (int*)&v->nodeid) == 0)) {
c2941a
+                                  (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
c2941a
 
c2941a
         crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
c2941a
 
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Thu, 10 Jun 2021 17:25:57 -0500
c2941a
Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a
c2941a
 sync response
c2941a
c2941a
... for code isolation, and because we need to add more to it
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++--------------
c2941a
 1 file changed, 39 insertions(+), 20 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index b3f441c..d02d3e6 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
c2941a
 }
c2941a
 
c2941a
 /*!
c2941a
+ * \internal
c2941a
+ * \brief Load attributes from a peer sync response
c2941a
+ *
c2941a
+ * \param[in] peer      Peer that sent clear request
c2941a
+ * \param[in] peer_won  Whether peer is the attribute writer
c2941a
+ * \param[in] xml       Request XML
c2941a
+ */
c2941a
+static void
c2941a
+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
c2941a
+{
c2941a
+    crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
c2941a
+             peer->uname);
c2941a
+
c2941a
+    if (peer_won) {
c2941a
+        /* Initialize the "seen" flag for all attributes to cleared, so we can
c2941a
+         * detect attributes that local node has but the writer doesn't.
c2941a
+         */
c2941a
+        clear_attribute_value_seen();
c2941a
+    }
c2941a
+
c2941a
+    // Process each attribute update in the sync response
c2941a
+    for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
c2941a
+         child = pcmk__xml_next(child)) {
c2941a
+        attrd_peer_update(peer, child,
c2941a
+                          crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
c2941a
+                          TRUE);
c2941a
+    }
c2941a
+
c2941a
+    if (peer_won) {
c2941a
+        /* If any attributes are still not marked as seen, the writer doesn't
c2941a
+         * know about them, so send all peers an update with them.
c2941a
+         */
c2941a
+        attrd_current_only_attribute_update(peer, xml);
c2941a
+    }
c2941a
+}
c2941a
+
c2941a
+/*!
c2941a
     \internal
c2941a
     \brief Broadcast private attribute for local node with protocol version
c2941a
 */
c2941a
@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
c2941a
     const char *op = crm_element_value(xml, PCMK__XA_TASK);
c2941a
     const char *election_op = crm_element_value(xml, F_CRM_TASK);
c2941a
     const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
c2941a
-    bool peer_won = FALSE;
c2941a
+    bool peer_won = false;
c2941a
 
c2941a
     if (election_op) {
c2941a
         attrd_handle_election_op(peer, xml);
c2941a
@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
c2941a
 
c2941a
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei)
c2941a
                && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
c2941a
-        xmlNode *child = NULL;
c2941a
-
c2941a
-        crm_info("Processing %s from %s", op, peer->uname);
c2941a
-
c2941a
-        /* Clear the seen flag for attribute processing held only in the own node. */
c2941a
-        if (peer_won) {
c2941a
-            clear_attribute_value_seen();
c2941a
-        }
c2941a
-
c2941a
-        for (child = pcmk__xml_first_child(xml); child != NULL;
c2941a
-             child = pcmk__xml_next(child)) {
c2941a
-            host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME);
c2941a
-            attrd_peer_update(peer, child, host, TRUE);
c2941a
-        }
c2941a
-
c2941a
-        if (peer_won) {
c2941a
-            /* Synchronize if there is an attribute held only by own node that Writer does not have. */
c2941a
-            attrd_current_only_attribute_update(peer, xml);
c2941a
-        }
c2941a
+        process_peer_sync_response(peer, peer_won, xml);
c2941a
 
6c49ac
     } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_FLUSH, pcmk__str_casei)) {
6c49ac
         /* Ignore. The flush command was removed in 2.0.0 but may be
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Fri, 11 Jun 2021 14:40:39 -0500
c2941a
Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting
c2941a
 local override
c2941a
c2941a
... for code isolation
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++-------------
c2941a
 1 file changed, 29 insertions(+), 13 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index d02d3e6..4783427 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
c2941a
     free_xml(sync);
c2941a
 }
c2941a
 
c2941a
+/*!
c2941a
+ * \internal
c2941a
+ * \brief Override an attribute sync with a local value
c2941a
+ *
c2941a
+ * Broadcast the local node's value for an attribute that's different from the
c2941a
+ * value provided in a peer's attribute synchronization response. This ensures a
c2941a
+ * node's values for itself take precedence and all peers are kept in sync.
c2941a
+ *
c2941a
+ * \param[in] a          Attribute entry to override
c2941a
+ *
c2941a
+ * \return Local instance of attribute value
c2941a
+ */
c2941a
+static attribute_value_t *
c2941a
+broadcast_local_value(attribute_t *a)
c2941a
+{
c2941a
+    attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
c2941a
+    xmlNode *sync = create_xml_node(NULL, __func__);
c2941a
+
c2941a
+    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
+    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
c2941a
+                        a->user, a->is_private, v->nodename, v->nodeid,
c2941a
+                        v->current, FALSE);
c2941a
+    attrd_xml_add_writer(sync);
c2941a
+    send_attrd_message(NULL, sync);
c2941a
+    free_xml(sync);
c2941a
+    return v;
c2941a
+}
c2941a
+
c2941a
 void
c2941a
 attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
c2941a
 {
c2941a
@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
c2941a
     if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
c2941a
         && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
c2941a
 
c2941a
-        xmlNode *sync = create_xml_node(NULL, __func__);
c2941a
-
c2941a
         crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
c2941a
                    attr, host, v->current, value, peer->uname);
c2941a
-
c2941a
-        crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
-        v = g_hash_table_lookup(a->values, host);
c2941a
-        build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user,
c2941a
-                            a->is_private, v->nodename, v->nodeid, v->current, FALSE);
c2941a
-
c2941a
-        attrd_xml_add_writer(sync);
c2941a
-
c2941a
-        /* Broadcast in case any other nodes had the inconsistent value */
c2941a
-        send_attrd_message(NULL, sync);
c2941a
-        free_xml(sync);
c2941a
+        v = broadcast_local_value(a);
c2941a
 
c2941a
     } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
c2941a
         crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s",
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Fri, 11 Jun 2021 14:50:29 -0500
c2941a
Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when
c2941a
 broadcasting local-only values
c2941a
c2941a
The traces aren't necessary since build_attribute_xml() already logs the same
c2941a
info at debug. Also, rename function for clarity, and make static.
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 35 ++++++++++++++++-------------------
c2941a
 1 file changed, 16 insertions(+), 19 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 4783427..356defb 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -51,11 +51,12 @@ GHashTable *attributes = NULL;
c2941a
 
c2941a
 void write_attribute(attribute_t *a, bool ignore_delay);
c2941a
 void write_or_elect_attribute(attribute_t *a);
c2941a
-void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml);
c2941a
 void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
c2941a
 void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
c2941a
 void attrd_peer_remove(const char *host, gboolean uncache, const char *source);
c2941a
 
c2941a
+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml);
c2941a
+
c2941a
 static gboolean
c2941a
 send_attrd_message(crm_node_t * node, xmlNode * data)
c2941a
 {
c2941a
@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
c2941a
         /* If any attributes are still not marked as seen, the writer doesn't
c2941a
          * know about them, so send all peers an update with them.
c2941a
          */
c2941a
-        attrd_current_only_attribute_update(peer, xml);
c2941a
+        broadcast_unseen_local_values(peer, xml);
c2941a
     }
c2941a
 }
c2941a
 
c2941a
@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
c2941a
     return(v);
c2941a
 }
c2941a
 
c2941a
-void 
c2941a
-attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
c2941a
+void
c2941a
+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
c2941a
 {
c2941a
     GHashTableIter aIter;
c2941a
     GHashTableIter vIter;
c2941a
-    attribute_t *a;
c2941a
+    attribute_t *a = NULL;
c2941a
     attribute_value_t *v = NULL;
c2941a
-    xmlNode *sync = create_xml_node(NULL, __func__);
c2941a
-    gboolean build = FALSE;    
c2941a
-
c2941a
-    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
+    xmlNode *sync = NULL;
c2941a
 
c2941a
     g_hash_table_iter_init(&aIter, attributes);
c2941a
     while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
c2941a
         g_hash_table_iter_init(&vIter, a->values);
c2941a
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
c2941a
-            if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) {
c2941a
-                crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current);
c2941a
-
c2941a
-                build = TRUE;
c2941a
+            if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
c2941a
+                                           pcmk__str_casei)) {
c2941a
+                if (sync == NULL) {
c2941a
+                    sync = create_xml_node(NULL, __func__);
c2941a
+                    crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
+                }
c2941a
                 build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
c2941a
                             v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
c2941a
-            } else {
c2941a
-                crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname);
c2941a
-                continue;
c2941a
             }
c2941a
         }
c2941a
     }
c2941a
 
c2941a
-    if (build) {
c2941a
-        crm_debug("Syncing values to everyone.(from local only attributes)");
c2941a
+    if (sync != NULL) {
c2941a
+        crm_debug("Broadcasting local-only values");
c2941a
         send_attrd_message(NULL, sync);
c2941a
+        free_xml(sync);
c2941a
     }
c2941a
-    free_xml(sync);
c2941a
 }
c2941a
 
c2941a
 /*!
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Fri, 11 Jun 2021 15:04:20 -0500
c2941a
Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML
c2941a
 creation function
c2941a
c2941a
... and rename for clarity
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------
c2941a
 1 file changed, 27 insertions(+), 21 deletions(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 356defb..5b32a77 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name)
c2941a
     CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
c2941a
 }
c2941a
 
c2941a
+/*!
c2941a
+ * \internal
c2941a
+ * \brief Create an XML representation of an attribute for use in peer messages
c2941a
+ *
c2941a
+ * \param[in] parent       Create attribute XML as child element of this element
c2941a
+ * \param[in] a            Attribute to represent
c2941a
+ * \param[in] v            Attribute value to represent
c2941a
+ * \param[in] force_write  If true, value should be written even if unchanged
c2941a
+ *
c2941a
+ * \return XML representation of attribute
c2941a
+ */
c2941a
 static xmlNode *
c2941a
-build_attribute_xml(
c2941a
-    xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
c2941a
-    gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write)
c2941a
+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
c2941a
+                        bool force_write)
c2941a
 {
c2941a
     xmlNode *xml = create_xml_node(parent, __func__);
c2941a
 
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_NAME, name);
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_SET, set);
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
c2941a
-    if (peerid > 0) {
c2941a
-        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename);
c2941a
+    if (v->nodeid > 0) {
c2941a
+        crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
c2941a
     }
c2941a
-    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
c2941a
-    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
c2941a
-    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
c2941a
-    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write);
c2941a
+    crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
c2941a
+    crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
c2941a
+    crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
c2941a
+    crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
c2941a
 
c2941a
     return xml;
c2941a
 }
c2941a
@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
c2941a
         g_hash_table_iter_init(&vIter, a->values);
c2941a
         while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
c2941a
             crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
c2941a
-            build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
c2941a
-                                v->nodename, v->nodeid, v->current, FALSE);
c2941a
+            add_attribute_value_xml(sync, a, v, false);
c2941a
         }
c2941a
     }
c2941a
 
c2941a
@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
c2941a
                     sync = create_xml_node(NULL, __func__);
c2941a
                     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
                 }
c2941a
-                build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
c2941a
-                            v->nodename, v->nodeid, v->current,  (a->timeout_ms && a->timer ? TRUE : FALSE));
c2941a
+                add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer);
c2941a
             }
c2941a
         }
c2941a
     }
c2941a
@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a)
c2941a
     xmlNode *sync = create_xml_node(NULL, __func__);
c2941a
 
c2941a
     crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
c2941a
-    build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
c2941a
-                        a->user, a->is_private, v->nodename, v->nodeid,
c2941a
-                        v->current, FALSE);
c2941a
+    add_attribute_value_xml(sync, a, v, false);
c2941a
     attrd_xml_add_writer(sync);
c2941a
     send_attrd_message(NULL, sync);
c2941a
     free_xml(sync);
c2941a
-- 
c2941a
1.8.3.1
c2941a
c2941a
c2941a
From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001
c2941a
From: Ken Gaillot <kgaillot@redhat.com>
c2941a
Date: Fri, 4 Jun 2021 16:34:26 -0500
c2941a
Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of
c2941a
 remote nodes
c2941a
c2941a
The attribute manager can't record a remote node's attributes to the CIB until
c2941a
it knows the node is remote. Normally, this is learned when the remote node
c2941a
starts, because the controller clears the CRM_OP_PROBED attribute and indicates
c2941a
that it is for a remote node.
c2941a
c2941a
However, if a cluster node is down when a remote node starts, and later comes
c2941a
up, it learns the remote node's existing attributes as part of the attribute
c2941a
sync. Previously, this did not include whether each value is for a cluster or
c2941a
remote node, so the newly joined attribute manager couldn't write out remote
c2941a
nodes' attributes until it learned that via some other event -- which might not
c2941a
happen before the node becomes DC, in which case its scheduler will not see any
c2941a
unfencing-related node attributes and may wrongly schedule unfencing.
c2941a
c2941a
The sync response handling already calls attrd_lookup_or_create_value(), which
c2941a
checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync
c2941a
response.
c2941a
---
c2941a
 daemons/attrd/attrd_commands.c | 6 +++++-
c2941a
 1 file changed, 5 insertions(+), 1 deletion(-)
c2941a
c2941a
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
c2941a
index 5b32a77..0142383 100644
c2941a
--- a/daemons/attrd/attrd_commands.c
c2941a
+++ b/daemons/attrd/attrd_commands.c
c2941a
@@ -43,8 +43,9 @@
c2941a
  *     1       1.1.15   PCMK__ATTRD_CMD_UPDATE_BOTH,
c2941a
  *                      PCMK__ATTRD_CMD_UPDATE_DELAY
c2941a
  *     2       1.1.17   PCMK__ATTRD_CMD_CLEAR_FAILURE
c2941a
+ *     3       2.1.1    PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
c2941a
  */
c2941a
-#define ATTRD_PROTOCOL_VERSION "2"
c2941a
+#define ATTRD_PROTOCOL_VERSION "3"
c2941a
 
c2941a
 int last_cib_op_done = 0;
c2941a
 GHashTable *attributes = NULL;
c2941a
@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
c2941a
     if (v->nodeid > 0) {
c2941a
         crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
c2941a
     }
c2941a
+    if (v->is_remote != 0) {
c2941a
+        crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
c2941a
+    }
c2941a
     crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
c2941a
     crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
c2941a
     crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
c2941a
-- 
c2941a
1.8.3.1
c2941a