Blob Blame History Raw
From 9e4addbcb67ea8e36ba853f1e401d8a6cb6a0aa3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 20 Dec 2019 11:34:06 -0600
Subject: [PATCH 1/8] Refactor: scheduler: reduce code duplication when
 displaying resources

Refactor native_output_string() to use GString, for readability and
maintainability. Refactor common_print() to use it, to reduce duplication and
ensure displays are consistent.

This makes a couple small changes in how things are shown:

* If pe_print_dev is enabled (a debugging flag not actually used by anything),
  the additional resource fields are shown with the resource flags rather than
  their own parenthesized list.

* The new output model is now consistent with the legacy print model in
  displaying resource flags with commas (not spaces) between them.
---
 include/crm/pengine/common.h |  24 +--
 lib/pengine/native.c         | 410 +++++++++++++++++--------------------------
 2 files changed, 168 insertions(+), 266 deletions(-)

diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h
index e497f9c..48c2b66 100644
--- a/include/crm/pengine/common.h
+++ b/include/crm/pengine/common.h
@@ -1,22 +1,12 @@
-/* 
- * Copyright 2004-2018 the Pacemaker project contributors
+/*
+ * Copyright 2004-2019 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- * 
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
  */
+
 #ifndef PE_COMMON__H
 #  define PE_COMMON__H
 
@@ -104,7 +94,7 @@ enum pe_print_options {
     pe_print_html           = 0x0002,
     pe_print_ncurses        = 0x0004,
     pe_print_printf         = 0x0008,
-    pe_print_dev            = 0x0010,
+    pe_print_dev            = 0x0010, // Debugging (@COMPAT probably not useful)
     pe_print_details        = 0x0020,
     pe_print_max_details    = 0x0040,
     pe_print_rsconly        = 0x0080,
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index fdb98e0..8fd98bc 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2019 the Pacemaker project contributors
+ * Copyright 2004-2020 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -490,165 +490,172 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri
     }
 }
 
-/* making this inline rather than a macro prevents a coverity "unreachable"
- * warning on the first usage
- */
-static inline const char *
-comma_if(int i)
+// Append a flag to resource description string's flags list
+static bool
+add_output_flag(GString *s, const char *flag_desc, bool have_flags)
 {
-    return i? ", " : "";
+    g_string_append(s, (have_flags? ", " : " ("));
+    g_string_append(s, flag_desc);
+    return true;
 }
 
-static char *
-flags_string(pe_resource_t *rsc, pe_node_t *node, long options,
-             const char *target_role)
+// Append a node name to resource description string's node list
+static bool
+add_output_node(GString *s, const char *node, bool have_nodes)
 {
-    char *flags[6] = { NULL, };
-    char *result = NULL;
-    int ndx = 0;
+    g_string_append(s, (have_nodes? " " : " [ "));
+    g_string_append(s, node);
+    return true;
+}
+
+/*!
+ * \internal
+ * \brief Create a string description of a resource
+ *
+ * \param[in] rsc          Resource to describe
+ * \param[in] name         Desired identifier for the resource
+ * \param[in] node         If not NULL, node that resource is "on"
+ * \param[in] options      Bitmask of pe_print_*
+ * \param[in] target_role  Resource's target role
+ * \param[in] show_nodes   Whether to display nodes when multiply active
+ *
+ * \return Newly allocated string description of resource
+ * \note Caller must free the result with g_free().
+ */
+static gchar *
+native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node,
+                     long options, const char *target_role, bool show_nodes)
+{
+    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
+    const char *provider = NULL;
+    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
+    char *retval = NULL;
+    GString *outstr = NULL;
+    bool have_flags = false;
+
+    CRM_CHECK(name != NULL, name = "unknown");
+    CRM_CHECK(kind != NULL, kind = "unknown");
+    CRM_CHECK(class != NULL, class = "unknown");
+
+    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
+        provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
+    }
 
-    if (node && node->details->online == FALSE && node->details->unclean) {
-        flags[ndx++] = strdup("UNCLEAN");
+    if (is_set(options, pe_print_rsconly)
+        || pcmk__list_of_multiple(rsc->running_on)) {
+        node = NULL;
     }
 
+    // We need a string of at least this size
+    outstr = g_string_sized_new(strlen(name) + strlen(class) + strlen(kind)
+                                + (provider? (strlen(provider) + 2) : 0)
+                                + (node? strlen(node->details->uname) + 1 : 0)
+                                + 11);
+
+    // Resource name and agent
+    g_string_printf(outstr, "%s\t(%s%s%s:%s):\t", name, class,
+                    /* @COMPAT This should be a single ':' (see CLBZ#5395) but
+                     * to avoid breaking anything relying on it, we're keeping
+                     * it like this until the next minor version bump.
+                     */
+                    (provider? "::" : ""), (provider? provider : ""), kind);
+
+    // State on node
+    if (is_set(rsc->flags, pe_rsc_orphan)) {
+        g_string_append(outstr, " ORPHANED");
+    }
+    if (is_set(rsc->flags, pe_rsc_failed)) {
+        enum rsc_role_e role = native_displayable_role(rsc);
+
+        if (role > RSC_ROLE_SLAVE) {
+            g_string_append_printf(outstr, " FAILED %s", role2text(role));
+        } else {
+            g_string_append(outstr, " FAILED");
+        }
+    } else {
+        g_string_append(outstr, native_displayable_state(rsc, options));
+    }
+    if (node) {
+        g_string_append_printf(outstr, " %s", node->details->uname);
+    }
+
+    // Flags, as: (<flag> [...])
+    if (node && !(node->details->online) && node->details->unclean) {
+        have_flags = add_output_flag(outstr, "UNCLEAN", have_flags);
+    }
     if (is_set(options, pe_print_pending)) {
         const char *pending_task = native_pending_task(rsc);
 
         if (pending_task) {
-            flags[ndx++] = strdup(pending_task);
+            have_flags = add_output_flag(outstr, pending_task, have_flags);
         }
     }
-
     if (target_role) {
         enum rsc_role_e target_role_e = text2role(target_role);
 
-        /* Ignore target role Started, as it is the default anyways
-         * (and would also allow a Master to be Master).
-         * Show if target role limits our abilities. */
+        /* Only show target role if it limits our abilities (i.e. ignore
+         * Started, as it is the default anyways, and doesn't prevent the
+         * resource from becoming Master).
+         */
         if (target_role_e == RSC_ROLE_STOPPED) {
-            flags[ndx++] = strdup("disabled");
+            have_flags = add_output_flag(outstr, "disabled", have_flags);
 
         } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)
                    && target_role_e == RSC_ROLE_SLAVE) {
-            flags[ndx++] = crm_strdup_printf("target-role:%s", target_role);
+            have_flags = add_output_flag(outstr, "target-role:", have_flags);
+            g_string_append(outstr, target_role);
         }
     }
-
     if (is_set(rsc->flags, pe_rsc_block)) {
-        flags[ndx++] = strdup("blocked");
-
+        have_flags = add_output_flag(outstr, "blocked", have_flags);
     } else if (is_not_set(rsc->flags, pe_rsc_managed)) {
-        flags[ndx++] = strdup("unmanaged");
+        have_flags = add_output_flag(outstr, "unmanaged", have_flags);
     }
-
     if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
-        flags[ndx++] = strdup("failure ignored");
+        have_flags = add_output_flag(outstr, "failure ignored", have_flags);
     }
-
-    if (ndx > 0) {
-        char *total = g_strjoinv(" ", flags);
-
-        result = crm_strdup_printf(" (%s)", total);
-        g_free(total);
-    }
-
-    while (--ndx >= 0) {
-        free(flags[ndx]);
-    }
-    return result;
-}
-
-static char *
-native_output_string(resource_t *rsc, const char *name, node_t *node, long options,
-                     const char *target_role) {
-    const char *desc = NULL;
-    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
-    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
-    enum rsc_role_e role = native_displayable_role(rsc);
-
-    char *retval = NULL;
-
-    char *unames = NULL;
-    char *provider = NULL;
-    const char *orphan = NULL;
-    char *role_s = NULL;
-    char *node_s = NULL;
-    char *print_dev_s = NULL;
-    char *flags_s = NULL;
-
-    CRM_ASSERT(kind != NULL);
-
-    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
-        provider = crm_strdup_printf("::%s", crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER));
+    if (is_set(options, pe_print_dev)) {
+        if (is_set(options, pe_rsc_provisional)) {
+            have_flags = add_output_flag(outstr, "provisional", have_flags);
+        }
+        if (is_not_set(options, pe_rsc_runnable)) {
+            have_flags = add_output_flag(outstr, "non-startable", have_flags);
+        }
+        have_flags = add_output_flag(outstr, "variant:", have_flags);
+        g_string_append_printf(outstr, "%s priority:%f",
+                                       crm_element_name(rsc->xml),
+                                       (double) (rsc->priority));
     }
-
-    if (is_set(rsc->flags, pe_rsc_orphan)) {
-        orphan = " ORPHANED";
+    if (have_flags) {
+        g_string_append(outstr, ")");
     }
 
-    if (role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) {
-        role_s = crm_strdup_printf(" FAILED %s", role2text(role));
-    } else if (is_set(rsc->flags, pe_rsc_failed)) {
-        role_s = crm_strdup_printf(" FAILED");
-    } else {
-        role_s = crm_strdup_printf(" %s", native_displayable_state(rsc, options));
-    }
+    // User-supplied description
+    if (is_set(options, pe_print_rsconly)
+        || pcmk__list_of_multiple(rsc->running_on)) {
+        const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
 
-    if (node) {
-        node_s = crm_strdup_printf(" %s", node->details->uname);
+        if (desc) {
+            g_string_append_printf(outstr, " %s", desc);
+        }
     }
 
-    if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
-    }
+    if (show_nodes && is_not_set(options, pe_print_rsconly)
+        && pcmk__list_of_multiple(rsc->running_on)) {
+        bool have_nodes = false;
 
-    if (is_not_set(options, pe_print_rsconly) && g_list_length(rsc->running_on) > 1) {
-        GListPtr gIter = rsc->running_on;
-        gchar **arr = calloc(g_list_length(rsc->running_on)+1, sizeof(gchar *));
-        int i = 0;
-        char *total = NULL;
+        for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
+            pe_node_t *n = (pe_node_t *) iter->data;
 
-        for (; gIter != NULL; gIter = gIter->next) {
-            node_t *n = (node_t *) gIter->data;
-            arr[i] = (gchar *) strdup(n->details->uname);
-            i++;
+            have_nodes = add_output_node(outstr, n->details->uname, have_nodes);
+        }
+        if (have_nodes) {
+            g_string_append(outstr, " ]");
         }
-
-        total = g_strjoinv(" ", arr);
-        unames = crm_strdup_printf(" [ %s ]", total);
-
-        g_free(total);
-        g_strfreev(arr);
     }
 
-    if (is_set(options, pe_print_dev)) {
-        print_dev_s = crm_strdup_printf(" (%s%svariant=%s, priority=%f)",
-                                        is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "",
-                                        is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ",
-                                        crm_element_name(rsc->xml), (double)rsc->priority);
-    }
-
-    flags_s = flags_string(rsc, node, options, target_role);
-
-    retval = crm_strdup_printf("%s\t(%s%s:%s):\t%s%s%s%s%s%s%s%s",
-                               name, class,
-                               provider ? provider : "",
-                               kind,
-                               orphan ? orphan : "",
-                               role_s,
-                               node_s ? node_s : "",
-                               print_dev_s ? print_dev_s : "",
-                               flags_s ? flags_s : "",
-                               desc ? " " : "", desc ? desc : "",
-                               unames ? unames : "");
-
-    free(provider);
-    free(role_s);
-    free(node_s);
-    free(unames);
-    free(print_dev_s);
-    free(flags_s);
-
+    retval = outstr->str;
+    g_string_free(outstr, FALSE);
     return retval;
 }
 
@@ -656,7 +663,6 @@ void
 pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
                        const char *name, node_t *node, long options)
 {
-    char *s = NULL;
     const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
     const char *target_role = NULL;
 
@@ -675,10 +681,6 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
     }
 
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        node = NULL;
-    }
-
     if (is_not_set(rsc->flags, pe_rsc_managed)) {
         cl = "rsc-managed";
 
@@ -698,10 +700,14 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
         cl = "rsc-ok";
     }
 
-    s = native_output_string(rsc, name, node, options, target_role);
-    list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
-    pcmk_create_html_node(list_node, "span", NULL, cl, s);
-    free(s);
+    {
+        gchar *s = native_output_string(rsc, name, node, options, target_role,
+                                        true);
+
+        list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
+        pcmk_create_html_node(list_node, "span", NULL, cl, s);
+        g_free(s);
+    }
 
     if (is_set(options, pe_print_details)) {
         GHashTableIter iter;
@@ -744,7 +750,6 @@ void
 pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
                        const char *name, node_t *node, long options)
 {
-    char *s = NULL;
     const char *target_role = NULL;
 
     CRM_ASSERT(rsc->variant == pe_native);
@@ -758,13 +763,13 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
     }
 
-    if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        node = NULL;
-    }
+    {
+        gchar *s = native_output_string(rsc, name, node, options, target_role,
+                                        true);
 
-    s = native_output_string(rsc, name, node, options, target_role);
-    out->list_item(out, NULL, "%s", s);
-    free(s);
+        out->list_item(out, NULL, "%s", s);
+        g_free(s);
+    }
 
     if (is_set(options, pe_print_details)) {
         GHashTableIter iter;
@@ -806,22 +811,14 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
 void
 common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data)
 {
-    const char *desc = NULL;
-    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
-    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
     const char *target_role = NULL;
-    enum rsc_role_e role = native_displayable_role(rsc);
-
-    int offset = 0;
-    int flagOffset = 0;
-    char buffer[LINE_MAX];
-    char flagBuffer[LINE_MAX];
 
     CRM_ASSERT(rsc->variant == pe_native);
-    CRM_ASSERT(kind != NULL);
 
     if (rsc->meta) {
-        const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC);
+        const char *is_internal = g_hash_table_lookup(rsc->meta,
+                                                      XML_RSC_ATTR_INTERNAL_RSC);
+
         if (crm_is_true(is_internal) && is_not_set(options, pe_print_implicit)) {
             crm_trace("skipping print of internal resource %s", rsc->id);
             return;
@@ -829,17 +826,13 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
     }
 
-    if (pre_text == NULL && (options & pe_print_printf)) {
-        pre_text = " ";
-    }
-
     if (options & pe_print_xml) {
         native_print_xml(rsc, pre_text, options, print_data);
         return;
     }
 
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        node = NULL;
+    if ((pre_text == NULL) && (options & pe_print_printf)) {
+        pre_text = " ";
     }
 
     if (options & pe_print_html) {
@@ -849,10 +842,10 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
         } else if (is_set(rsc->flags, pe_rsc_failed)) {
             status_print("<font color=\"red\">");
 
-        } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) {
+        } else if (rsc->running_on == NULL) {
             status_print("<font color=\"red\">");
 
-        } else if (g_list_length(rsc->running_on) > 1) {
+        } else if (pcmk__list_of_multiple(rsc->running_on)) {
             status_print("<font color=\"orange\">");
 
         } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
@@ -863,106 +856,29 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
         }
     }
 
-    if(pre_text) {
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text);
-    }
-    offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name);
-    offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class);
-    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
-        const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov);
-    }
-    offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind);
-    if(is_set(rsc->flags, pe_rsc_orphan)) {
-        offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED ");
-    }
-    if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) {
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role));
-    } else if(is_set(rsc->flags, pe_rsc_failed)) {
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED");
-    } else {
-        const char *rsc_state = native_displayable_state(rsc, options);
-
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state);
-    }
-
-    if(node) {
-        offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname);
-
-        if (node->details->online == FALSE && node->details->unclean) {
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                                   "%sUNCLEAN", comma_if(flagOffset));
-        }
-    }
-
-    if (options & pe_print_pending) {
-        const char *pending_task = native_pending_task(rsc);
-
-        if (pending_task) {
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                                   "%s%s", comma_if(flagOffset), pending_task);
-        }
-    }
-
-    if (target_role) {
-        enum rsc_role_e target_role_e = text2role(target_role);
-
-        /* Ignore target role Started, as it is the default anyways
-         * (and would also allow a Master to be Master).
-         * Show if target role limits our abilities. */
-        if (target_role_e == RSC_ROLE_STOPPED) {
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                                   "%sdisabled", comma_if(flagOffset));
-
-        } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)
-                   && target_role_e == RSC_ROLE_SLAVE) {
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                                   "%starget-role:%s", comma_if(flagOffset), target_role);
-        }
-    }
-
-    if (is_set(rsc->flags, pe_rsc_block)) {
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                               "%sblocked", comma_if(flagOffset));
-
-    } else if (is_not_set(rsc->flags, pe_rsc_managed)) {
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                               "%sunmanaged", comma_if(flagOffset));
-    }
-
-    if(is_set(rsc->flags, pe_rsc_failure_ignored)) {
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
-                               "%sfailure ignored", comma_if(flagOffset));
-    }
-
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
-    }
-
-    CRM_LOG_ASSERT(offset > 0);
-    if(flagOffset > 0) {
-        status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:"");
-    } else {
-        status_print("%s%s%s", buffer, desc?" ":"", desc?desc:"");
+    {
+        gchar *resource_s = native_output_string(rsc, name, node, options,
+                                                 target_role, false);
+        status_print("%s%s", (pre_text? pre_text : ""), resource_s);
+        g_free(resource_s);
     }
 
 #if CURSES_ENABLED
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
-        /* Done */
-
-    } else if (options & pe_print_ncurses) {
+    if (is_set(options, pe_print_ncurses)
+        && is_not_set(options, pe_print_rsconly)
+        && !pcmk__list_of_multiple(rsc->running_on)) {
         /* coverity[negative_returns] False positive */
         move(-1, 0);
     }
 #endif
 
-    if (options & pe_print_html) {
+    if (is_set(options, pe_print_html)) {
         status_print(" </font> ");
     }
 
-    if ((options & pe_print_rsconly)) {
+    if (is_not_set(options, pe_print_rsconly)
+        && pcmk__list_of_multiple(rsc->running_on)) {
 
-    } else if (g_list_length(rsc->running_on) > 1) {
         GListPtr gIter = rsc->running_on;
         int counter = 0;
 
@@ -1025,10 +941,6 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
         GHashTableIter iter;
         node_t *n = NULL;
 
-        status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text,
-                     is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "",
-                     is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ",
-                     crm_element_name(rsc->xml), (double)rsc->priority);
         status_print("%s\tAllowed Nodes", pre_text);
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) {
-- 
1.8.3.1


From 41e911be8ea9151b3f0758c2c22c0e69b8b78d93 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 19 Dec 2019 17:18:41 -0600
Subject: [PATCH 2/8] Log: scheduler: drop redundant trace messages

We logged "applying placement constraints" three times.
---
 lib/pacemaker/pcmk_sched_allocate.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
index ca43c71..dde8b69 100644
--- a/lib/pacemaker/pcmk_sched_allocate.c
+++ b/lib/pacemaker/pcmk_sched_allocate.c
@@ -623,21 +623,15 @@ check_actions(pe_working_set_t * data_set)
     }
 }
 
-static gboolean
+static void
 apply_placement_constraints(pe_working_set_t * data_set)
 {
-    GListPtr gIter = NULL;
-
-    crm_trace("Applying constraints...");
-
-    for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
+    for (GList *gIter = data_set->placement_constraints;
+         gIter != NULL; gIter = gIter->next) {
         pe__location_t *cons = gIter->data;
 
         cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
     }
-
-    return TRUE;
-
 }
 
 static gboolean
@@ -994,10 +988,7 @@ stage2(pe_working_set_t * data_set)
 {
     GListPtr gIter = NULL;
 
-    crm_trace("Applying placement constraints");
-
-    gIter = data_set->nodes;
-    for (; gIter != NULL; gIter = gIter->next) {
+    for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
         node_t *node = (node_t *) gIter->data;
 
         if (node == NULL) {
-- 
1.8.3.1


From 7fe136e19b5018d609beb8bad4e34234739572c9 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Sat, 7 Dec 2019 12:13:11 -0600
Subject: [PATCH 3/8] Refactor: libcrmcommon: convenience functions for list
 length comparisons

... for efficiency and readability
---
 include/crm/common/internal.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
index da2c7d7..484c836 100644
--- a/include/crm/common/internal.h
+++ b/include/crm/common/internal.h
@@ -126,6 +126,20 @@ crm_getpid_s()
     return crm_strdup_printf("%lu", (unsigned long) getpid());
 }
 
+// More efficient than g_list_length(list) == 1
+static inline bool
+pcmk__list_of_1(GList *list)
+{
+    return list && (list->next == NULL);
+}
+
+// More efficient than g_list_length(list) > 1
+static inline bool
+pcmk__list_of_multiple(GList *list)
+{
+    return list && (list->next != NULL);
+}
+
 /* convenience functions for failure-related node attributes */
 
 #define CRM_FAIL_COUNT_PREFIX   "fail-count"
-- 
1.8.3.1


From 9ff4f6bca540576f0a3333c959e8014ed168353f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 16 Dec 2019 14:13:30 -0600
Subject: [PATCH 4/8] Refactor: libcrmcommon: add convenience macros for
 plurals

I've avoided making s_if_plural() an official API due to its hackiness, but
it really is the best solution for now. Promote it to pcmk__plural_s(), along
with a companion macro pcmk__plural_alt() for more complicated plurals.
---
 include/crm/common/internal.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
index 484c836..ee560c9 100644
--- a/include/crm/common/internal.h
+++ b/include/crm/common/internal.h
@@ -107,6 +107,29 @@ bool crm_compress_string(const char *data, int length, int max, char **result,
                          unsigned int *result_len);
 gint crm_alpha_sort(gconstpointer a, gconstpointer b);
 
+/* Correctly displaying singular or plural is complicated; consider "1 node has"
+ * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g.
+ *
+ * if (a == 1) {
+ *     crm_info("singular message"):
+ * } else {
+ *     crm_info("plural message");
+ * }
+ *
+ * though even that's not sufficient for all languages besides English (if we
+ * ever desire to do translations of output and log messages). But the following
+ * convenience macros are "good enough" and more concise for many cases.
+ */
+
+/* Example:
+ * crm_info("Found %d %s", nentries,
+ *          pcmk__plural_alt(nentries, "entry", "entries"));
+ */
+#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2))
+
+// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes));
+#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s")
+
 static inline char *
 crm_concat(const char *prefix, const char *suffix, char join)
 {
-- 
1.8.3.1


From 0378db5030400202e59b2bae0dabd65d00a3e9c8 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 12 Dec 2019 20:50:50 -0600
Subject: [PATCH 5/8] Log: controller: improve join messages

---
 daemons/controld/controld_fsa.c     |  81 ++++----
 daemons/controld/controld_join_dc.c | 383 +++++++++++++++++++++---------------
 2 files changed, 268 insertions(+), 196 deletions(-)

diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
index 6760224..b985fa9 100644
--- a/daemons/controld/controld_fsa.c
+++ b/daemons/controld/controld_fsa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2019 the Pacemaker project contributors
+ * Copyright 2004-2020 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -460,12 +460,53 @@ log_fsa_input(fsa_data_t * stored_msg)
     }
 }
 
+static void
+check_join_counts(fsa_data_t *msg_data)
+{
+    int count;
+    guint npeers;
+
+    count = crmd_join_phase_count(crm_join_finalized);
+    if (count > 0) {
+        crm_err("%d cluster node%s failed to confirm join",
+                count, pcmk__plural_s(count));
+        crmd_join_phase_log(LOG_NOTICE);
+        return;
+    }
+
+    npeers = crm_active_peers();
+    count = crmd_join_phase_count(crm_join_confirmed);
+    if (count == npeers) {
+        if (npeers == 1) {
+            crm_debug("Sole active cluster node is fully joined");
+        } else {
+            crm_debug("All %d active cluster nodes are fully joined", count);
+        }
+
+    } else if (count > npeers) {
+        crm_err("New election needed because more nodes confirmed join "
+                "than are in membership (%d > %u)", count, npeers);
+        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
+
+    } else if (saved_ccm_membership_id != crm_peer_seq) {
+        crm_info("New join needed because membership changed (%llu -> %llu)",
+                 saved_ccm_membership_id, crm_peer_seq);
+        register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
+
+    } else {
+        crm_warn("Only %d of %u active cluster nodes fully joined "
+                 "(%d did not respond to offer)",
+                 count, npeers, crmd_join_phase_count(crm_join_welcomed));
+    }
+}
+
 long long
 do_state_transition(long long actions,
                     enum crmd_fsa_state cur_state,
                     enum crmd_fsa_state next_state, fsa_data_t * msg_data)
 {
     int level = LOG_INFO;
+    int count = 0;
     long long tmp = actions;
     gboolean clear_recovery_bit = TRUE;
 
@@ -563,13 +604,14 @@ do_state_transition(long long actions,
                 crm_warn("Progressed to state %s after %s",
                          fsa_state2string(next_state), fsa_cause2string(cause));
             }
-            if (crmd_join_phase_count(crm_join_welcomed) > 0) {
-                crm_warn("%u cluster nodes failed to respond"
-                         " to the join offer.", crmd_join_phase_count(crm_join_welcomed));
+            count = crmd_join_phase_count(crm_join_welcomed);
+            if (count > 0) {
+                crm_warn("%d cluster node%s failed to respond to join offer",
+                         count, pcmk__plural_s(count));
                 crmd_join_phase_log(LOG_NOTICE);
 
             } else {
-                crm_debug("All %d cluster nodes responded to the join offer.",
+                crm_debug("All cluster nodes (%d) responded to join offer",
                           crmd_join_phase_count(crm_join_integrated));
             }
             break;
@@ -581,34 +623,7 @@ do_state_transition(long long actions,
                 crm_info("Progressed to state %s after %s",
                          fsa_state2string(next_state), fsa_cause2string(cause));
             }
-
-            if (crmd_join_phase_count(crm_join_finalized) > 0) {
-                crm_err("%u cluster nodes failed to confirm their join.",
-                        crmd_join_phase_count(crm_join_finalized));
-                crmd_join_phase_log(LOG_NOTICE);
-
-            } else if (crmd_join_phase_count(crm_join_confirmed)
-                       == crm_active_peers()) {
-                crm_debug("All %u cluster nodes are"
-                          " eligible to run resources.", crm_active_peers());
-
-            } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) {
-                crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
-                        crmd_join_phase_count(crm_join_confirmed), crm_active_peers());
-                register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
-
-            } else if (saved_ccm_membership_id != crm_peer_seq) {
-                crm_info("Membership changed: %llu -> %llu - join restart",
-                         saved_ccm_membership_id, crm_peer_seq);
-                register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
-
-            } else {
-                crm_warn("Only %u of %u cluster "
-                         "nodes are eligible to run resources - continue %d",
-                         crmd_join_phase_count(crm_join_confirmed),
-                         crm_active_peers(), crmd_join_phase_count(crm_join_welcomed));
-            }
-/* 			initialize_join(FALSE); */
+            check_join_counts(msg_data);
             break;
 
         case S_STOPPING:
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index 988aaa6..54324b2 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -26,7 +26,11 @@ void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
 
+/* Numeric counter used to identify join rounds (an unsigned int would be
+ * appropriate, except we get and set it in XML as int)
+ */
 static int current_join_id = 0;
+
 unsigned long long saved_ccm_membership_id = 0;
 
 void
@@ -34,12 +38,7 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
 {
     enum crm_join_phase last = 0;
 
-    if(node == NULL) {
-        crm_err("Could not update join because node not specified"
-                CRM_XS " join-%u source=%s phase=%s",
-                current_join_id, source, crm_join_phase_str(phase));
-        return;
-    }
+    CRM_CHECK(node != NULL, return);
 
     /* Remote nodes do not participate in joins */
     if (is_set(node->flags, crm_remote_node)) {
@@ -49,21 +48,23 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
     last = node->join;
 
     if(phase == last) {
-        crm_trace("%s: Node %s[%u] - join-%u phase still %s",
-                  source, node->uname, node->id, current_join_id,
-                  crm_join_phase_str(last));
+        crm_trace("Node %s join-%d phase is still %s "
+                  CRM_XS " nodeid=%u source=%s",
+                  node->uname, current_join_id, crm_join_phase_str(last),
+                  node->id, source);
 
     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
         node->join = phase;
-        crm_info("%s: Node %s[%u] - join-%u phase %s -> %s",
-                 source, node->uname, node->id, current_join_id,
-                 crm_join_phase_str(last), crm_join_phase_str(phase));
+        crm_trace("Node %s join-%d phase is now %s (was %s) "
+                  CRM_XS " nodeid=%u source=%s",
+                 node->uname, current_join_id, crm_join_phase_str(phase),
+                 crm_join_phase_str(last), node->id, source);
 
     } else {
-        crm_err("Could not update join for node %s because phase transition invalid "
-                CRM_XS " join-%u source=%s node_id=%u last=%s new=%s",
-                node->uname, current_join_id, source, node->id,
-                crm_join_phase_str(last), crm_join_phase_str(phase));
+        crm_warn("Rejecting join-%d phase update for node %s because "
+                 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
+                 current_join_id, node->uname, crm_join_phase_str(last),
+                 crm_join_phase_str(phase), node->id, source);
     }
 }
 
@@ -73,9 +74,7 @@ initialize_join(gboolean before)
     GHashTableIter iter;
     crm_node_t *peer = NULL;
 
-    /* clear out/reset a bunch of stuff */
-    crm_debug("join-%d: Initializing join data (flag=%s)",
-              current_join_id, before ? "true" : "false");
+    crm_debug("Starting new join round join-%d", current_join_id);
 
     g_hash_table_iter_init(&iter, crm_peer_cache);
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
@@ -128,7 +127,9 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
 
     CRM_ASSERT(member != NULL);
     if (crm_is_peer_active(member) == FALSE) {
-        crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
+        crm_info("Not making join-%d offer to inactive node %s",
+                 current_join_id,
+                 (member->uname? member->uname : "with unknown name"));
         if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
             /* You would think this unsafe, but in fact this plus an
              * active resource is what causes it to be fenced.
@@ -145,17 +146,21 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
     }
 
     if (member->uname == NULL) {
-        crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid);
+        crm_info("Not making join-%d offer to node uuid %s with unknown name",
+                 current_join_id, member->uuid);
         return;
     }
 
     if (saved_ccm_membership_id != crm_peer_seq) {
         saved_ccm_membership_id = crm_peer_seq;
-        crm_info("Making join offers based on membership %llu", crm_peer_seq);
+        crm_info("Making join-%d offers based on membership event %llu",
+                 current_join_id, crm_peer_seq);
     }
 
     if(user_data && member->join > crm_join_none) {
-        crm_info("Skipping %s: already known %d", member->uname, member->join);
+        crm_info("Not making join-%d offer to already known node %s (%s)",
+                 current_join_id, member->uname,
+                 crm_join_phase_str(member->join));
         return;
     }
 
@@ -166,14 +171,11 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
     // Advertise our feature set so the joining node can bail if not compatible
     crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
 
-    /* send the welcome */
-    crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
-
+    crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
     send_cluster_message(member, crm_msg_crmd, offer, TRUE);
     free_xml(offer);
 
     crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
-    /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
 }
 
 /*	 A_DC_JOIN_OFFER_ALL	*/
@@ -183,6 +185,8 @@ do_dc_join_offer_all(long long action,
                      enum crmd_fsa_state cur_state,
                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 {
+    int count;
+
     /* Reset everyone's status back to down or in_ccm in the CIB.
      * Any nodes that are active in the CIB but not in the cluster membership
      * will be seen as offline by the scheduler anyway.
@@ -197,9 +201,11 @@ do_dc_join_offer_all(long long action,
     }
     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
 
+    count = crmd_join_phase_count(crm_join_welcomed);
+    crm_info("Waiting on join-%d requests from %d outstanding node%s",
+             current_join_id, count, pcmk__plural_s(count));
+
     // Don't waste time by invoking the scheduler yet
-    crm_info("join-%d: Waiting on %d outstanding join acks",
-             current_join_id, crmd_join_phase_count(crm_join_welcomed));
 }
 
 /*	 A_DC_JOIN_OFFER_ONE	*/
@@ -211,50 +217,40 @@ do_dc_join_offer_one(long long action,
 {
     crm_node_t *member;
     ha_msg_input_t *welcome = NULL;
-
-    const char *op = NULL;
+    int count;
     const char *join_to = NULL;
 
-    if (msg_data->data) {
-        welcome = fsa_typed_data(fsa_dt_ha_msg);
-
-    } else {
-        crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
+    if (msg_data->data == NULL) {
+        crm_info("Making join-%d offers to any unconfirmed nodes "
+                 "because an unknown node joined", current_join_id);
         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
         check_join_state(cur_state, __FUNCTION__);
         return;
     }
 
+    welcome = fsa_typed_data(fsa_dt_ha_msg);
     if (welcome == NULL) {
-        crm_err("Attempt to send welcome message without a message to reply to!");
+        // fsa_typed_data() already logged an error
         return;
     }
 
     join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
     if (join_to == NULL) {
-        crm_err("Attempt to send welcome message without a host to reply to!");
+        crm_err("Can't make join-%d offer to unknown node", current_join_id);
         return;
     }
-
     member = crm_get_peer(0, join_to);
-    op = crm_element_value(welcome->msg, F_CRM_TASK);
-    if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
-        /* note: it _is_ possible that a node will have been
-         *  sick or starting up when the original offer was made.
-         *  however, it will either re-announce itself in due course
-         *  _or_ we can re-store the original offer on the client.
-         */
-        crm_trace("(Re-)offering membership to %s...", join_to);
-    }
 
-    crm_info("join-%d: Processing %s request from %s in state %s",
-             current_join_id, op, join_to, fsa_state2string(cur_state));
+    /* It is possible that a node will have been sick or starting up when the
+     * original offer was made. However, it will either re-announce itself in
+     * due course, or we can re-store the original offer on the client.
+     */
 
     crm_update_peer_join(__FUNCTION__, member, crm_join_none);
     join_make_offer(NULL, member, NULL);
 
-    /* always offer to the DC (ourselves)
-     * this ensures the correct value for max_generation_from
+    /* If the offer isn't to the local node, make an offer to the local node as
+     * well, to ensure the correct value for max_generation_from.
      */
     if (strcmp(join_to, fsa_our_uname) != 0) {
         member = crm_get_peer(0, fsa_our_uname);
@@ -266,9 +262,11 @@ do_dc_join_offer_one(long long action,
      */
     abort_transition(INFINITY, tg_restart, "Node join", NULL);
 
+    count = crmd_join_phase_count(crm_join_welcomed);
+    crm_info("Waiting on join-%d requests from %d outstanding node%s",
+             current_join_id, count, pcmk__plural_s(count));
+
     // Don't waste time by invoking the scheduler yet
-    crm_debug("Waiting on %d outstanding join acks for join-%d",
-              crmd_join_phase_count(crm_join_welcomed), current_join_id);
 }
 
 static int
@@ -301,22 +299,31 @@ do_dc_join_filter_offer(long long action,
 
     int cmp = 0;
     int join_id = -1;
+    int count = 0;
     gboolean ack_nack_bool = TRUE;
-    const char *ack_nack = CRMD_JOINSTATE_MEMBER;
     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
 
     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
     const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
     const char *join_version = crm_element_value(join_ack->msg,
                                                  XML_ATTR_CRM_VERSION);
+    crm_node_t *join_node = NULL;
 
-    crm_node_t *join_node = crm_get_peer(0, join_from);
-
-    crm_debug("Processing req from %s", join_from);
+    if (join_from == NULL) {
+        crm_err("Ignoring invalid join request without node name");
+        return;
+    }
+    join_node = crm_get_peer(0, join_from);
 
-    generation = join_ack->xml;
     crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
+    if (join_id != current_join_id) {
+        crm_debug("Ignoring join-%d request from %s because we are on join-%d",
+                  join_id, join_from, current_join_id);
+        check_join_state(cur_state, __FUNCTION__);
+        return;
+    }
 
+    generation = join_ack->xml;
     if (max_generation_xml != NULL && generation != NULL) {
         int lpc = 0;
 
@@ -331,68 +338,71 @@ do_dc_join_filter_offer(long long action,
         }
     }
 
-    if (join_id != current_join_id) {
-        crm_debug("Invalid response from %s: join-%d vs. join-%d",
-                  join_from, join_id, current_join_id);
-        check_join_state(cur_state, __FUNCTION__);
-        return;
+    if (ref == NULL) {
+        ref = "none"; // for logging only
+    }
 
-    } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
-        crm_err("Node %s is not a member", join_from);
+    if (crm_is_peer_active(join_node) == FALSE) {
+        crm_err("Rejecting join-%d request from inactive node %s "
+                CRM_XS " ref=%s", join_id, join_from, ref);
         ack_nack_bool = FALSE;
 
     } else if (generation == NULL) {
-        crm_err("Generation was NULL");
+        crm_err("Rejecting invalid join-%d request from node %s "
+                "missing CIB generation " CRM_XS " ref=%s",
+                join_id, join_from, ref);
         ack_nack_bool = FALSE;
 
     } else if ((join_version == NULL)
                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
-        crm_err("Node %s feature set (%s) is incompatible with ours (%s)",
-                join_from, (join_version? join_version : "pre-3.1.0"),
-                CRM_FEATURE_SET);
+        crm_err("Rejecting join-%d request from node %s because feature set %s"
+                " is incompatible with ours (%s) " CRM_XS " ref=%s",
+                join_id, join_from, (join_version? join_version : "pre-3.1.0"),
+                CRM_FEATURE_SET, ref);
         ack_nack_bool = FALSE;
 
     } else if (max_generation_xml == NULL) {
+        crm_debug("Accepting join-%d request from %s "
+                  "(with first CIB generation) " CRM_XS " ref=%s",
+                  join_id, join_from, ref);
         max_generation_xml = copy_xml(generation);
         max_generation_from = strdup(join_from);
 
     } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
-        crm_debug("%s has a better generation number than"
-                  " the current max %s", join_from, max_generation_from);
-        if (max_generation_xml) {
-            crm_log_xml_debug(max_generation_xml, "Max generation");
-        }
-        crm_log_xml_debug(generation, "Their generation");
+        crm_debug("Accepting join-%d request from %s (with better "
+                  "CIB generation than current best from %s) " CRM_XS " ref=%s",
+                  join_id, join_from, max_generation_from, ref);
+        crm_log_xml_debug(max_generation_xml, "Old max generation");
+        crm_log_xml_debug(generation, "New max generation");
 
         free(max_generation_from);
         free_xml(max_generation_xml);
 
         max_generation_from = strdup(join_from);
         max_generation_xml = copy_xml(join_ack->xml);
+
+    } else {
+        crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
+                  join_id, join_from, ref);
     }
 
     if (ack_nack_bool == FALSE) {
-        /* NACK this client */
-        ack_nack = CRMD_JOINSTATE_NACK;
         crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
-        crm_err("Rejecting cluster join request from %s " CRM_XS
-                " NACK join-%d ref=%s", join_from, join_id, ref);
-
+        crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
     } else {
-        crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
         crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
+        crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
     }
 
-    crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
-
-    crm_debug("%u nodes have been integrated into join-%d",
-              crmd_join_phase_count(crm_join_integrated), join_id);
-
+    count = crmd_join_phase_count(crm_join_integrated);
+    crm_debug("%d node%s currently integrated in join-%d",
+              count, pcmk__plural_s(count), join_id);
 
     if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
         // Don't waste time by invoking the scheduler yet
-        crm_debug("join-%d: Still waiting on %d outstanding offers",
-                  join_id, crmd_join_phase_count(crm_join_welcomed));
+        count = crmd_join_phase_count(crm_join_welcomed);
+        crm_debug("Waiting on join-%d requests from %d outstanding node%s",
+                  join_id, count, pcmk__plural_s(count));
     }
 }
 
@@ -405,21 +415,24 @@ do_dc_join_finalize(long long action,
 {
     char *sync_from = NULL;
     int rc = pcmk_ok;
+    int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
+    int count_integrated = crmd_join_phase_count(crm_join_integrated);
 
     /* This we can do straight away and avoid clients timing us out
      *  while we compute the latest CIB
      */
-    crm_debug("Finalizing join-%d for %d clients",
-              current_join_id, crmd_join_phase_count(crm_join_integrated));
-
-    crmd_join_phase_log(LOG_INFO);
-    if (crmd_join_phase_count(crm_join_welcomed) != 0) {
-        crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
+    if (count_welcomed != 0) {
+        crm_debug("Waiting on join-%d requests from %d outstanding node%s "
+                  "before finalizing join", current_join_id, count_welcomed,
+                  pcmk__plural_s(count_welcomed));
+        crmd_join_phase_log(LOG_DEBUG);
         /* crmd_fsa_stall(FALSE); Needed? */
         return;
 
-    } else if (crmd_join_phase_count(crm_join_integrated) == 0) {
-        /* Nothing to do */
+    } else if (count_integrated == 0) {
+        crm_debug("Finalization not needed for join-%d at the current time",
+                  current_join_id);
+        crmd_join_phase_log(LOG_DEBUG);
         check_join_state(fsa_state, __FUNCTION__);
         return;
     }
@@ -430,8 +443,9 @@ do_dc_join_finalize(long long action,
     }
 
     if (is_set(fsa_input_register, R_IN_TRANSITION)) {
-        crm_warn("Delaying response to cluster join offer while transition in progress "
-                 CRM_XS " join-%d", current_join_id);
+        crm_warn("Delaying join-%d finalization while transition in progress",
+                 current_join_id);
+        crmd_join_phase_log(LOG_DEBUG);
         crmd_fsa_stall(FALSE);
         return;
     }
@@ -440,18 +454,20 @@ do_dc_join_finalize(long long action,
         /* ask for the agreed best CIB */
         sync_from = strdup(max_generation_from);
         set_bit(fsa_input_register, R_CIB_ASKED);
-        crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
-                   CRM_XS " join-%d", sync_from, current_join_id);
-        crm_log_xml_notice(max_generation_xml, "Requested version");
+        crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
+                   current_join_id, count_integrated,
+                   pcmk__plural_s(count_integrated), sync_from);
+        crm_log_xml_notice(max_generation_xml, "Requested CIB version");
 
     } else {
         /* Send _our_ CIB out to everyone */
         sync_from = strdup(fsa_our_uname);
-        crm_info("join-%d: Syncing our CIB to the rest of the cluster",
-                 current_join_id);
-        crm_log_xml_debug(max_generation_xml, "Requested version");
+        crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
+                  current_join_id, count_integrated,
+                  pcmk__plural_s(count_integrated));
+        crm_log_xml_debug(max_generation_xml, "Requested CIB version");
     }
-
+    crmd_join_phase_log(LOG_DEBUG);
 
     rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
     fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
@@ -463,26 +479,33 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi
     CRM_LOG_ASSERT(-EPERM != rc);
     clear_bit(fsa_input_register, R_CIB_ASKED);
     if (rc != pcmk_ok) {
-        do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
-                   "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
+        do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
+                   "Could not sync CIB from %s in join-%d: %s",
+                   (char *) user_data, current_join_id, pcmk_strerror(rc));
 
         /* restart the whole join process */
         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
 
-    } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
+    } else if (!AM_I_DC) {
+        crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
+
+    } else if (fsa_state != S_FINALIZE_JOIN) {
+        crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
+                  current_join_id, fsa_state2string(fsa_state));
+
+    } else {
         set_bit(fsa_input_register, R_HAVE_CIB);
         clear_bit(fsa_input_register, R_CIB_ASKED);
 
         /* make sure dc_uuid is re-set to us */
         if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
-            crm_debug("Notifying %d clients of join-%d results",
-                      crmd_join_phase_count(crm_join_integrated), current_join_id);
+            int count_integrated = crmd_join_phase_count(crm_join_integrated);
+
+            crm_debug("Notifying %d node%s of join-%d results",
+                      count_integrated, pcmk__plural_s(count_integrated),
+                      current_join_id);
             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
         }
-
-    } else {
-        crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s",
-                  AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state));
     }
 }
 
@@ -492,11 +515,14 @@ join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * outp
     fsa_data_t *msg_data = NULL;
 
     if (rc == pcmk_ok) {
-        crm_debug("Join update %d complete", call_id);
+        crm_debug("join-%d node history update (via CIB call %d) complete",
+                  current_join_id, call_id);
         check_join_state(fsa_state, __FUNCTION__);
 
     } else {
-        crm_err("Join update %d failed", call_id);
+        crm_err("join-%d node history update (via CIB call %d) failed: %s "
+                "(next transition may determine resource status incorrectly)",
+                current_join_id, call_id, pcmk_strerror(rc));
         crm_log_xml_debug(msg, "failed");
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
     }
@@ -515,61 +541,75 @@ do_dc_join_ack(long long action,
 
     const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
-    crm_node_t *peer = crm_get_peer(0, join_from);
+    crm_node_t *peer = NULL;
 
-    if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
-        crm_debug("Ignoring op=%s message from %s", op, join_from);
+    // Sanity checks
+    if (join_from == NULL) {
+        crm_warn("Ignoring message received without node identification");
+        return;
+    }
+    if (op == NULL) {
+        crm_warn("Ignoring message received from %s without task", join_from);
         return;
     }
 
-    crm_trace("Processing ack from %s", join_from);
-    crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
+    if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
+        crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
+                  op, join_from, CRM_OP_JOIN_CONFIRM);
+        return;
+    }
 
+    if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
+        crm_warn("Ignoring join confirmation from %s without valid join ID",
+                 join_from);
+        return;
+    }
+
+    peer = crm_get_peer(0, join_from);
     if (peer->join != crm_join_finalized) {
-        crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
-                 join_id, join_from, peer->join);
+        crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
+                 "(currently %s not %s)",
+                 join_id, join_from, crm_join_phase_str(peer->join),
+                 crm_join_phase_str(crm_join_finalized));
         return;
+    }
 
-    } else if (join_id != current_join_id) {
-        crm_err("Invalid response from %s: join-%d vs. join-%d",
-                join_from, join_id, current_join_id);
+    if (join_id != current_join_id) {
+        crm_err("Rejecting join-%d confirmation from %s "
+                "because currently on join-%d",
+                join_id, join_from, current_join_id);
         crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
         return;
     }
 
     crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);
 
-    crm_info("join-%d: Updating node state to %s for %s",
-             join_id, CRMD_JOINSTATE_MEMBER, join_from);
-
-    /* update CIB with the current LRM status from the node
-     * We don't need to notify the TE of these updates, a transition will
-     *   be started in due time
+    /* Update CIB with node's current executor state. A new transition will be
+     * triggered later, when the CIB notifies us of the change.
      */
     erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
-
     if (safe_str_eq(join_from, fsa_our_uname)) {
         xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
 
         if (now_dc_lrmd_state != NULL) {
-            crm_debug("Local executor state updated from query");
             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
             free_xml(now_dc_lrmd_state);
+            crm_debug("Updating local node history for join-%d "
+                      "from query result (via CIB call %d)", join_id, call_id);
         } else {
-            crm_warn("Local executor state updated from join acknowledgement because query failed");
             fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
+            crm_warn("Updating local node history from join-%d confirmation "
+                     "because query failed (via CIB call %d)", join_id, call_id);
         }
     } else {
-        crm_debug("Executor state for %s updated from join acknowledgement",
-                  join_from);
         fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
            cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
+        crm_debug("Updating node history for %s from join-%d confirmation "
+                  "(via CIB call %d)", join_from, join_id, call_id);
     }
-
     fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
-    crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id);
 }
 
 void
@@ -581,17 +621,16 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
     const char *join_to = join_node->uname;
 
     if(join_node->join != crm_join_integrated) {
-        crm_trace("Skipping %s in state %d", join_to, join_node->join);
+        crm_trace("Not updating non-integrated node %s (%s) for join-%d",
+                  join_to, crm_join_phase_str(join_node->join),
+                  current_join_id);
         return;
     }
 
-    /* make sure a node entry exists for the new node */
-    crm_trace("Creating node entry for %s", join_to);
-
+    crm_trace("Updating node state for %s", join_to);
     tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
     set_uuid(tmp1, XML_ATTR_UUID, join_node);
     crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
-
     fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
     free_xml(tmp1);
 
@@ -610,11 +649,10 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
         return;
     }
 
-    /* send the ack/nack to the node */
-    acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
-
-    crm_debug("join-%d: ACK'ing join request from %s",
+    // Acknowledge node's join request
+    crm_debug("Acknowledging join-%d request from %s",
               current_join_id, join_to);
+    acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
     crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
     crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
     crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
@@ -629,11 +667,11 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
 {
     static unsigned long long highest_seq = 0;
 
-    crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
-
     if (saved_ccm_membership_id != crm_peer_seq) {
-        crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
-                  source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
+        crm_debug("join-%d: Membership changed from %llu to %llu "
+                  CRM_XS " highest=%llu state=%s for=%s",
+                  current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
+                  fsa_state2string(cur_state), source);
         if(highest_seq < crm_peer_seq) {
             /* Don't spam the FSA with duplicates */
             highest_seq = crm_peer_seq;
@@ -642,34 +680,53 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
 
     } else if (cur_state == S_INTEGRATION) {
         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
-            crm_debug("join-%d: Integration of %d peers complete: %s",
-                      current_join_id, crmd_join_phase_count(crm_join_integrated), source);
+            int count = crmd_join_phase_count(crm_join_integrated);
+
+            crm_debug("join-%d: Integration of %d peer%s complete "
+                      CRM_XS " state=%s for=%s",
+                      current_join_id, count, pcmk__plural_s(count),
+                      fsa_state2string(cur_state), source);
             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
             return TRUE;
         }
 
     } else if (cur_state == S_FINALIZE_JOIN) {
         if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
-            crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
+            crm_debug("join-%d: Delaying finalization until we have CIB "
+                      CRM_XS " state=%s for=%s",
+                      current_join_id, fsa_state2string(cur_state), source);
             return TRUE;
 
         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
-            crm_debug("join-%d: Still waiting on %d welcomed nodes",
-                      current_join_id, crmd_join_phase_count(crm_join_welcomed));
+            int count = crmd_join_phase_count(crm_join_welcomed);
+
+            crm_debug("join-%d: Still waiting on %d welcomed node%s "
+                      CRM_XS " state=%s for=%s",
+                      current_join_id, count, pcmk__plural_s(count),
+                      fsa_state2string(cur_state), source);
             crmd_join_phase_log(LOG_DEBUG);
 
         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
-            crm_debug("join-%d: Still waiting on %d integrated nodes",
-                      current_join_id, crmd_join_phase_count(crm_join_integrated));
+            int count = crmd_join_phase_count(crm_join_integrated);
+
+            crm_debug("join-%d: Still waiting on %d integrated node%s "
+                      CRM_XS " state=%s for=%s",
+                      current_join_id, count, pcmk__plural_s(count),
+                      fsa_state2string(cur_state), source);
             crmd_join_phase_log(LOG_DEBUG);
 
         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
-            crm_debug("join-%d: Still waiting on %d finalized nodes",
-                      current_join_id, crmd_join_phase_count(crm_join_finalized));
+            int count = crmd_join_phase_count(crm_join_finalized);
+
+            crm_debug("join-%d: Still waiting on %d finalized node%s "
+                      CRM_XS " state=%s for=%s",
+                      current_join_id, count, pcmk__plural_s(count),
+                      fsa_state2string(cur_state), source);
             crmd_join_phase_log(LOG_DEBUG);
 
         } else {
-            crm_debug("join-%d complete: %s", current_join_id, source);
+            crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
+                      current_join_id, fsa_state2string(cur_state), source);
             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
             return TRUE;
         }
-- 
1.8.3.1


From 034b27734d05e8aeddb586f2daaede8314f9516f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 13 Dec 2019 10:39:34 -0600
Subject: [PATCH 6/8] Log: controller: improve CIB status deletion messages

---
 daemons/controld/controld_utils.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
index 3acd488..bb8ace9 100644
--- a/daemons/controld/controld_utils.c
+++ b/daemons/controld/controld_utils.c
@@ -751,14 +751,18 @@ update_dc(xmlNode * msg)
     return TRUE;
 }
 
-#define STATUS_PATH_MAX 512
 static void
 erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
 {
     char *xpath = user_data;
 
-    do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
-                        "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc);
+    if (rc == 0) {
+        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
+                  xpath, call_id);
+    } else {
+        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
+                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
+    }
 }
 
 #define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
@@ -766,14 +770,19 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
 void
 erase_status_tag(const char *uname, const char *tag, int options)
 {
-    if (fsa_cib_conn && uname) {
+    CRM_CHECK(uname != NULL, return);
+
+    if (fsa_cib_conn == NULL) {
+        crm_warn("Unable to delete CIB '%s' section for node %s: "
+                 "no CIB connection", tag, uname);
+    } else {
         int call_id;
         char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
 
-        crm_info("Deleting %s status entries for %s " CRM_XS " xpath=%s",
-                 tag, uname, xpath);
-        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL,
-                                             cib_quorum_override | cib_xpath | options);
+        options |= cib_quorum_override|cib_xpath;
+        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
+        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
+                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
         fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
         // CIB library handles freeing xpath
     }
-- 
1.8.3.1


From 73510818bc9905dcc130893198590b10c0067425 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 13 Dec 2019 10:36:56 -0600
Subject: [PATCH 7/8] Refactor: controller: move erase_status_tag() to
 controld_based.c

---
 daemons/controld/controld_based.c | 38 ++++++++++++++++++++++++++++++++++++++
 daemons/controld/controld_utils.c | 37 -------------------------------------
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
index e6a4612..1db5650 100644
--- a/daemons/controld/controld_based.c
+++ b/daemons/controld/controld_based.c
@@ -168,3 +168,41 @@ controld_action_is_recordable(const char *action)
     }
     return TRUE;
 }
+
+static void
+erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
+                     void *user_data)
+{
+    char *xpath = user_data;
+
+    if (rc == 0) {
+        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
+                  xpath, call_id);
+    } else {
+        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
+                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
+    }
+}
+
+#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
+
+void
+erase_status_tag(const char *uname, const char *tag, int options)
+{
+    CRM_CHECK(uname != NULL, return);
+
+    if (fsa_cib_conn == NULL) {
+        crm_warn("Unable to delete CIB '%s' section for node %s: "
+                 "no CIB connection", tag, uname);
+    } else {
+        int call_id;
+        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
+
+        options |= cib_quorum_override|cib_xpath;
+        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
+        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
+                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
+        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
+        // CIB library handles freeing xpath
+    }
+}
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
index bb8ace9..4ed6aeb 100644
--- a/daemons/controld/controld_utils.c
+++ b/daemons/controld/controld_utils.c
@@ -751,43 +751,6 @@ update_dc(xmlNode * msg)
     return TRUE;
 }
 
-static void
-erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
-{
-    char *xpath = user_data;
-
-    if (rc == 0) {
-        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
-                  xpath, call_id);
-    } else {
-        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
-                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
-    }
-}
-
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
-
-void
-erase_status_tag(const char *uname, const char *tag, int options)
-{
-    CRM_CHECK(uname != NULL, return);
-
-    if (fsa_cib_conn == NULL) {
-        crm_warn("Unable to delete CIB '%s' section for node %s: "
-                 "no CIB connection", tag, uname);
-    } else {
-        int call_id;
-        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
-
-        options |= cib_quorum_override|cib_xpath;
-        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
-        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
-                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
-        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
-        // CIB library handles freeing xpath
-    }
-}
-
 void crmd_peer_down(crm_node_t *peer, bool full) 
 {
     if(full && peer->state == NULL) {
-- 
1.8.3.1


From c4cc759e733db894957d039f65572cc21704224f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 13 Dec 2019 11:16:25 -0600
Subject: [PATCH 8/8] Refactor: controller: improve efficiency when deleting
 node state

Rename erase_status_xpath() to controld_delete_node_state() to follow current
naming practice.

Instead of passing it a node_state subsection name, pass a new enum value
indicating what to erase (resource history, transient node attributes, or
both). This allows us to improve the log messages further, as well as improving
efficiency when both need to be cleared.
---
 daemons/controld/controld_based.c     | 69 +++++++++++++++++++++++++++--------
 daemons/controld/controld_callbacks.c |  8 +++-
 daemons/controld/controld_execd.c     |  3 +-
 daemons/controld/controld_fencing.c   |  5 +--
 daemons/controld/controld_join_dc.c   |  3 +-
 daemons/controld/controld_remote_ra.c | 24 ++++++------
 daemons/controld/controld_utils.h     | 11 +++++-
 7 files changed, 87 insertions(+), 36 deletions(-)

diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
index 1db5650..008a02d 100644
--- a/daemons/controld/controld_based.c
+++ b/daemons/controld/controld_based.c
@@ -170,39 +170,76 @@ controld_action_is_recordable(const char *action)
 }
 
 static void
-erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
-                     void *user_data)
+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
+                    void *user_data)
 {
-    char *xpath = user_data;
+    char *desc = user_data;
 
     if (rc == 0) {
-        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
-                  xpath, call_id);
+        crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
     } else {
-        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
-                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
+        crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
+                 desc, call_id, pcmk_strerror(rc), rc);
     }
 }
 
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
+// Searches for various portions of node_state to delete
 
+// Match a particular node's node_state (takes node name 1x)
+#define XPATH_NODE_STATE        "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
+
+// Node's lrm section (name 1x)
+#define XPATH_NODE_LRM          XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
+
+// Node's transient_attributes section (name 1x)
+#define XPATH_NODE_ATTRS        XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
+
+// Everything under node_state (name 1x)
+#define XPATH_NODE_ALL          XPATH_NODE_STATE "/*"
+
+/*!
+ * \internal
+ * \brief Delete subsection of a node's CIB node_state
+ *
+ * \param[in] uname    Desired node
+ * \param[in] section  Subsection of node_state to delete
+ * \param[in] options  CIB call options to use
+ */
 void
-erase_status_tag(const char *uname, const char *tag, int options)
+controld_delete_node_state(const char *uname, enum controld_section_e section,
+                           int options)
 {
+    char *xpath = NULL;
+    char *desc = NULL;
+
     CRM_CHECK(uname != NULL, return);
+    switch (section) {
+        case controld_section_lrm:
+            xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
+            desc = crm_strdup_printf("resource history for node %s", uname);
+            break;
+        case controld_section_attrs:
+            xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
+            desc = crm_strdup_printf("transient attributes for node %s", uname);
+            break;
+        case controld_section_all:
+            xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
+            desc = crm_strdup_printf("all state for node %s", uname);
+            break;
+    }
 
     if (fsa_cib_conn == NULL) {
-        crm_warn("Unable to delete CIB '%s' section for node %s: "
-                 "no CIB connection", tag, uname);
+        crm_warn("Unable to delete %s: no CIB connection", desc);
+        free(desc);
     } else {
         int call_id;
-        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
 
         options |= cib_quorum_override|cib_xpath;
         call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
-        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
-                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
-        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
-        // CIB library handles freeing xpath
+        crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
+                 desc, call_id, xpath);
+        fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback);
+        // CIB library handles freeing desc
     }
+    free(xpath);
 }
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 5cbd392..f7e3db2 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -200,14 +200,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
                  * transient attributes intact until it rejoins.
                  */
                 if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
-                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
+                    controld_delete_node_state(node->uname,
+                                               controld_section_attrs,
+                                               cib_scope_local);
                 }
 
             } else if(AM_I_DC) {
                 if (appeared) {
                     te_trigger_stonith_history_sync(FALSE);
                 } else {
-                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
+                    controld_delete_node_state(node->uname,
+                                               controld_section_attrs,
+                                               cib_scope_local);
                 }
             }
             break;
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 46c1958..b7deeae 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1411,7 +1411,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
     }
 
     /* Now delete the copy in the CIB */
-    erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
+    controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
+                               cib_scope_local);
 
     /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
      * would result in the scheduler sending us back here again
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index d9b1e1e..9897cf3 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -229,9 +229,8 @@ send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
     /* Make sure it sticks */
     /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */
 
-    erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
-    erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
-
+    controld_delete_node_state(peer->uname, controld_section_all,
+                               cib_scope_local);
     free_xml(node_state);
     return;
 }
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index 54324b2..ac6b430 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -587,7 +587,8 @@ do_dc_join_ack(long long action,
     /* Update CIB with node's current executor state. A new transition will be
      * triggered later, when the CIB notifies us of the change.
      */
-    erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
+    controld_delete_node_state(join_from, controld_section_lrm,
+                               cib_scope_local);
     if (safe_str_eq(join_from, fsa_our_uname)) {
         xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
 
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 4fbae45..2d3dfa7 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -181,13 +181,13 @@ remote_node_up(const char *node_name)
     CRM_CHECK(node_name != NULL, return);
     crm_info("Announcing pacemaker_remote node %s", node_name);
 
-    /* Clear node's operation history. The node's transient attributes should
-     * and normally will be cleared when the node leaves, but since remote node
-     * state has a number of corner cases, clear them here as well, to be sure.
+    /* Clear node's entire state (resource history and transient attributes).
+     * The transient attributes should and normally will be cleared when the
+     * node leaves, but since remote node state has a number of corner cases,
+     * clear them here as well, to be sure.
      */
     call_opt = crmd_cib_smart_opt();
-    erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
-    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
+    controld_delete_node_state(node_name, controld_section_all, call_opt);
 
     /* Clear node's probed attribute */
     update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
@@ -252,15 +252,15 @@ remote_node_down(const char *node_name, const enum down_opts opts)
     /* Purge node from attrd's memory */
     update_attrd_remote_node_removed(node_name, NULL);
 
-    /* Purge node's transient attributes */
-    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
-
-    /* Normally, the LRM operation history should be kept until the node comes
-     * back up. However, after a successful fence, we want to clear it, so we
-     * don't think resources are still running on the node.
+    /* Normally, only node attributes should be erased, and the resource history
+     * should be kept until the node comes back up. However, after a successful
+     * fence, we want to clear the history as well, so we don't think resources
+     * are still running on the node.
      */
     if (opts == DOWN_ERASE_LRM) {
-        erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
+        controld_delete_node_state(node_name, controld_section_all, call_opt);
+    } else {
+        controld_delete_node_state(node_name, controld_section_attrs, call_opt);
     }
 
     /* Ensure node is in the remote peer cache with lost state */
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
index cf04f13..f902361 100644
--- a/daemons/controld/controld_utils.h
+++ b/daemons/controld/controld_utils.h
@@ -70,7 +70,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags,
                                   xmlNode *parent, const char *source);
 void populate_cib_nodes(enum node_update_flags flags, const char *source);
 void crm_update_quorum(gboolean quorum, gboolean force_update);
-void erase_status_tag(const char *uname, const char *tag, int options);
 void controld_close_attrd_ipc(void);
 void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
 void update_attrd_remote_node_removed(const char *host, const char *user_name);
@@ -87,6 +86,16 @@ unsigned int cib_op_timeout(void);
 bool feature_set_compatible(const char *dc_version, const char *join_version);
 bool controld_action_is_recordable(const char *action);
 
+// Subsections of node_state
+enum controld_section_e {
+    controld_section_lrm,
+    controld_section_attrs,
+    controld_section_all,
+};
+
+void controld_delete_node_state(const char *uname,
+                                enum controld_section_e section, int options);
+
 const char *get_node_id(xmlNode *lrm_rsc_op);
 
 /* Convenience macro for registering a CIB callback
-- 
1.8.3.1