Blame SOURCES/002-status-deletion.patch

ed4e54
From 9e4addbcb67ea8e36ba853f1e401d8a6cb6a0aa3 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Fri, 20 Dec 2019 11:34:06 -0600
ed4e54
Subject: [PATCH 1/8] Refactor: scheduler: reduce code duplication when
ed4e54
 displaying resources
ed4e54
ed4e54
Refactor native_output_string() to use GString, for readability and
ed4e54
maintainability. Refactor common_print() to use it, to reduce duplication and
ed4e54
ensure displays are consistent.
ed4e54
ed4e54
This makes a couple small changes in how things are shown:
ed4e54
ed4e54
* If pe_print_dev is enabled (a debugging flag not actually used by anything),
ed4e54
  the additional resource fields are shown with the resource flags rather than
ed4e54
  their own parenthesized list.
ed4e54
ed4e54
* The new output model is now consistent with the legacy print model in
ed4e54
  displaying resource flags with commas (not spaces) between them.
ed4e54
---
ed4e54
 include/crm/pengine/common.h |  24 +--
ed4e54
 lib/pengine/native.c         | 410 +++++++++++++++++--------------------------
ed4e54
 2 files changed, 168 insertions(+), 266 deletions(-)
ed4e54
ed4e54
diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h
ed4e54
index e497f9c..48c2b66 100644
ed4e54
--- a/include/crm/pengine/common.h
ed4e54
+++ b/include/crm/pengine/common.h
ed4e54
@@ -1,22 +1,12 @@
ed4e54
-/* 
ed4e54
- * Copyright 2004-2018 the Pacemaker project contributors
ed4e54
+/*
ed4e54
+ * Copyright 2004-2019 the Pacemaker project contributors
ed4e54
  *
ed4e54
  * The version control history for this file may have further details.
ed4e54
- * 
ed4e54
- * This program is free software; you can redistribute it and/or
ed4e54
- * modify it under the terms of the GNU Lesser General Public
ed4e54
- * License as published by the Free Software Foundation; either
ed4e54
- * version 2 of the License, or (at your option) any later version.
ed4e54
- * 
ed4e54
- * This software is distributed in the hope that it will be useful,
ed4e54
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
ed4e54
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
ed4e54
- * General Public License for more details.
ed4e54
- * 
ed4e54
- * You should have received a copy of the GNU Lesser General Public
ed4e54
- * License along with this library; if not, write to the Free Software
ed4e54
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
ed4e54
+ *
ed4e54
+ * This source code is licensed under the GNU Lesser General Public License
ed4e54
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
ed4e54
  */
ed4e54
+
ed4e54
 #ifndef PE_COMMON__H
ed4e54
 #  define PE_COMMON__H
ed4e54
 
ed4e54
@@ -104,7 +94,7 @@ enum pe_print_options {
ed4e54
     pe_print_html           = 0x0002,
ed4e54
     pe_print_ncurses        = 0x0004,
ed4e54
     pe_print_printf         = 0x0008,
ed4e54
-    pe_print_dev            = 0x0010,
ed4e54
+    pe_print_dev            = 0x0010, // Debugging (@COMPAT probably not useful)
ed4e54
     pe_print_details        = 0x0020,
ed4e54
     pe_print_max_details    = 0x0040,
ed4e54
     pe_print_rsconly        = 0x0080,
ed4e54
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
ed4e54
index fdb98e0..8fd98bc 100644
ed4e54
--- a/lib/pengine/native.c
ed4e54
+++ b/lib/pengine/native.c
ed4e54
@@ -1,5 +1,5 @@
ed4e54
 /*
ed4e54
- * Copyright 2004-2019 the Pacemaker project contributors
ed4e54
+ * Copyright 2004-2020 the Pacemaker project contributors
ed4e54
  *
ed4e54
  * The version control history for this file may have further details.
ed4e54
  *
ed4e54
@@ -490,165 +490,172 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
-/* making this inline rather than a macro prevents a coverity "unreachable"
ed4e54
- * warning on the first usage
ed4e54
- */
ed4e54
-static inline const char *
ed4e54
-comma_if(int i)
ed4e54
+// Append a flag to resource description string's flags list
ed4e54
+static bool
ed4e54
+add_output_flag(GString *s, const char *flag_desc, bool have_flags)
ed4e54
 {
ed4e54
-    return i? ", " : "";
ed4e54
+    g_string_append(s, (have_flags? ", " : " ("));
ed4e54
+    g_string_append(s, flag_desc);
ed4e54
+    return true;
ed4e54
 }
ed4e54
 
ed4e54
-static char *
ed4e54
-flags_string(pe_resource_t *rsc, pe_node_t *node, long options,
ed4e54
-             const char *target_role)
ed4e54
+// Append a node name to resource description string's node list
ed4e54
+static bool
ed4e54
+add_output_node(GString *s, const char *node, bool have_nodes)
ed4e54
 {
ed4e54
-    char *flags[6] = { NULL, };
ed4e54
-    char *result = NULL;
ed4e54
-    int ndx = 0;
ed4e54
+    g_string_append(s, (have_nodes? " " : " [ "));
ed4e54
+    g_string_append(s, node);
ed4e54
+    return true;
ed4e54
+}
ed4e54
+
ed4e54
+/*!
ed4e54
+ * \internal
ed4e54
+ * \brief Create a string description of a resource
ed4e54
+ *
ed4e54
+ * \param[in] rsc          Resource to describe
ed4e54
+ * \param[in] name         Desired identifier for the resource
ed4e54
+ * \param[in] node         If not NULL, node that resource is "on"
ed4e54
+ * \param[in] options      Bitmask of pe_print_*
ed4e54
+ * \param[in] target_role  Resource's target role
ed4e54
+ * \param[in] show_nodes   Whether to display nodes when multiply active
ed4e54
+ *
ed4e54
+ * \return Newly allocated string description of resource
ed4e54
+ * \note Caller must free the result with g_free().
ed4e54
+ */
ed4e54
+static gchar *
ed4e54
+native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node,
ed4e54
+                     long options, const char *target_role, bool show_nodes)
ed4e54
+{
ed4e54
+    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
ed4e54
+    const char *provider = NULL;
ed4e54
+    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
ed4e54
+    char *retval = NULL;
ed4e54
+    GString *outstr = NULL;
ed4e54
+    bool have_flags = false;
ed4e54
+
ed4e54
+    CRM_CHECK(name != NULL, name = "unknown");
ed4e54
+    CRM_CHECK(kind != NULL, kind = "unknown");
ed4e54
+    CRM_CHECK(class != NULL, class = "unknown");
ed4e54
+
ed4e54
+    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
ed4e54
+        provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
ed4e54
+    }
ed4e54
 
ed4e54
-    if (node && node->details->online == FALSE && node->details->unclean) {
ed4e54
-        flags[ndx++] = strdup("UNCLEAN");
ed4e54
+    if (is_set(options, pe_print_rsconly)
ed4e54
+        || pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
+        node = NULL;
ed4e54
     }
ed4e54
 
ed4e54
+    // We need a string of at least this size
ed4e54
+    outstr = g_string_sized_new(strlen(name) + strlen(class) + strlen(kind)
ed4e54
+                                + (provider? (strlen(provider) + 2) : 0)
ed4e54
+                                + (node? strlen(node->details->uname) + 1 : 0)
ed4e54
+                                + 11);
ed4e54
+
ed4e54
+    // Resource name and agent
ed4e54
+    g_string_printf(outstr, "%s\t(%s%s%s:%s):\t", name, class,
ed4e54
+                    /* @COMPAT This should be a single ':' (see CLBZ#5395) but
ed4e54
+                     * to avoid breaking anything relying on it, we're keeping
ed4e54
+                     * it like this until the next minor version bump.
ed4e54
+                     */
ed4e54
+                    (provider? "::" : ""), (provider? provider : ""), kind);
ed4e54
+
ed4e54
+    // State on node
ed4e54
+    if (is_set(rsc->flags, pe_rsc_orphan)) {
ed4e54
+        g_string_append(outstr, " ORPHANED");
ed4e54
+    }
ed4e54
+    if (is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
+        enum rsc_role_e role = native_displayable_role(rsc);
ed4e54
+
ed4e54
+        if (role > RSC_ROLE_SLAVE) {
ed4e54
+            g_string_append_printf(outstr, " FAILED %s", role2text(role));
ed4e54
+        } else {
ed4e54
+            g_string_append(outstr, " FAILED");
ed4e54
+        }
ed4e54
+    } else {
ed4e54
+        g_string_append(outstr, native_displayable_state(rsc, options));
ed4e54
+    }
ed4e54
+    if (node) {
ed4e54
+        g_string_append_printf(outstr, " %s", node->details->uname);
ed4e54
+    }
ed4e54
+
ed4e54
+    // Flags, as: (<flag> [...])
ed4e54
+    if (node && !(node->details->online) && node->details->unclean) {
ed4e54
+        have_flags = add_output_flag(outstr, "UNCLEAN", have_flags);
ed4e54
+    }
ed4e54
     if (is_set(options, pe_print_pending)) {
ed4e54
         const char *pending_task = native_pending_task(rsc);
ed4e54
 
ed4e54
         if (pending_task) {
ed4e54
-            flags[ndx++] = strdup(pending_task);
ed4e54
+            have_flags = add_output_flag(outstr, pending_task, have_flags);
ed4e54
         }
ed4e54
     }
ed4e54
-
ed4e54
     if (target_role) {
ed4e54
         enum rsc_role_e target_role_e = text2role(target_role);
ed4e54
 
ed4e54
-        /* Ignore target role Started, as it is the default anyways
ed4e54
-         * (and would also allow a Master to be Master).
ed4e54
-         * Show if target role limits our abilities. */
ed4e54
+        /* Only show target role if it limits our abilities (i.e. ignore
ed4e54
+         * Started, as it is the default anyways, and doesn't prevent the
ed4e54
+         * resource from becoming Master).
ed4e54
+         */
ed4e54
         if (target_role_e == RSC_ROLE_STOPPED) {
ed4e54
-            flags[ndx++] = strdup("disabled");
ed4e54
+            have_flags = add_output_flag(outstr, "disabled", have_flags);
ed4e54
 
ed4e54
         } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)
ed4e54
                    && target_role_e == RSC_ROLE_SLAVE) {
ed4e54
-            flags[ndx++] = crm_strdup_printf("target-role:%s", target_role);
ed4e54
+            have_flags = add_output_flag(outstr, "target-role:", have_flags);
ed4e54
+            g_string_append(outstr, target_role);
ed4e54
         }
ed4e54
     }
ed4e54
-
ed4e54
     if (is_set(rsc->flags, pe_rsc_block)) {
ed4e54
-        flags[ndx++] = strdup("blocked");
ed4e54
-
ed4e54
+        have_flags = add_output_flag(outstr, "blocked", have_flags);
ed4e54
     } else if (is_not_set(rsc->flags, pe_rsc_managed)) {
ed4e54
-        flags[ndx++] = strdup("unmanaged");
ed4e54
+        have_flags = add_output_flag(outstr, "unmanaged", have_flags);
ed4e54
     }
ed4e54
-
ed4e54
     if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
ed4e54
-        flags[ndx++] = strdup("failure ignored");
ed4e54
+        have_flags = add_output_flag(outstr, "failure ignored", have_flags);
ed4e54
     }
ed4e54
-
ed4e54
-    if (ndx > 0) {
ed4e54
-        char *total = g_strjoinv(" ", flags);
ed4e54
-
ed4e54
-        result = crm_strdup_printf(" (%s)", total);
ed4e54
-        g_free(total);
ed4e54
-    }
ed4e54
-
ed4e54
-    while (--ndx >= 0) {
ed4e54
-        free(flags[ndx]);
ed4e54
-    }
ed4e54
-    return result;
ed4e54
-}
ed4e54
-
ed4e54
-static char *
ed4e54
-native_output_string(resource_t *rsc, const char *name, node_t *node, long options,
ed4e54
-                     const char *target_role) {
ed4e54
-    const char *desc = NULL;
ed4e54
-    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
ed4e54
-    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
ed4e54
-    enum rsc_role_e role = native_displayable_role(rsc);
ed4e54
-
ed4e54
-    char *retval = NULL;
ed4e54
-
ed4e54
-    char *unames = NULL;
ed4e54
-    char *provider = NULL;
ed4e54
-    const char *orphan = NULL;
ed4e54
-    char *role_s = NULL;
ed4e54
-    char *node_s = NULL;
ed4e54
-    char *print_dev_s = NULL;
ed4e54
-    char *flags_s = NULL;
ed4e54
-
ed4e54
-    CRM_ASSERT(kind != NULL);
ed4e54
-
ed4e54
-    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
ed4e54
-        provider = crm_strdup_printf("::%s", crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER));
ed4e54
+    if (is_set(options, pe_print_dev)) {
ed4e54
+        if (is_set(options, pe_rsc_provisional)) {
ed4e54
+            have_flags = add_output_flag(outstr, "provisional", have_flags);
ed4e54
+        }
ed4e54
+        if (is_not_set(options, pe_rsc_runnable)) {
ed4e54
+            have_flags = add_output_flag(outstr, "non-startable", have_flags);
ed4e54
+        }
ed4e54
+        have_flags = add_output_flag(outstr, "variant:", have_flags);
ed4e54
+        g_string_append_printf(outstr, "%s priority:%f",
ed4e54
+                                       crm_element_name(rsc->xml),
ed4e54
+                                       (double) (rsc->priority));
ed4e54
     }
ed4e54
-
ed4e54
-    if (is_set(rsc->flags, pe_rsc_orphan)) {
ed4e54
-        orphan = " ORPHANED";
ed4e54
+    if (have_flags) {
ed4e54
+        g_string_append(outstr, ")");
ed4e54
     }
ed4e54
 
ed4e54
-    if (role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
-        role_s = crm_strdup_printf(" FAILED %s", role2text(role));
ed4e54
-    } else if (is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
-        role_s = crm_strdup_printf(" FAILED");
ed4e54
-    } else {
ed4e54
-        role_s = crm_strdup_printf(" %s", native_displayable_state(rsc, options));
ed4e54
-    }
ed4e54
+    // User-supplied description
ed4e54
+    if (is_set(options, pe_print_rsconly)
ed4e54
+        || pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
+        const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
ed4e54
 
ed4e54
-    if (node) {
ed4e54
-        node_s = crm_strdup_printf(" %s", node->details->uname);
ed4e54
+        if (desc) {
ed4e54
+            g_string_append_printf(outstr, " %s", desc);
ed4e54
+        }
ed4e54
     }
ed4e54
 
ed4e54
-    if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
ed4e54
-    }
ed4e54
+    if (show_nodes && is_not_set(options, pe_print_rsconly)
ed4e54
+        && pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
+        bool have_nodes = false;
ed4e54
 
ed4e54
-    if (is_not_set(options, pe_print_rsconly) && g_list_length(rsc->running_on) > 1) {
ed4e54
-        GListPtr gIter = rsc->running_on;
ed4e54
-        gchar **arr = calloc(g_list_length(rsc->running_on)+1, sizeof(gchar *));
ed4e54
-        int i = 0;
ed4e54
-        char *total = NULL;
ed4e54
+        for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
ed4e54
+            pe_node_t *n = (pe_node_t *) iter->data;
ed4e54
 
ed4e54
-        for (; gIter != NULL; gIter = gIter->next) {
ed4e54
-            node_t *n = (node_t *) gIter->data;
ed4e54
-            arr[i] = (gchar *) strdup(n->details->uname);
ed4e54
-            i++;
ed4e54
+            have_nodes = add_output_node(outstr, n->details->uname, have_nodes);
ed4e54
+        }
ed4e54
+        if (have_nodes) {
ed4e54
+            g_string_append(outstr, " ]");
ed4e54
         }
ed4e54
-
ed4e54
-        total = g_strjoinv(" ", arr);
ed4e54
-        unames = crm_strdup_printf(" [ %s ]", total);
ed4e54
-
ed4e54
-        g_free(total);
ed4e54
-        g_strfreev(arr);
ed4e54
     }
ed4e54
 
ed4e54
-    if (is_set(options, pe_print_dev)) {
ed4e54
-        print_dev_s = crm_strdup_printf(" (%s%svariant=%s, priority=%f)",
ed4e54
-                                        is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "",
ed4e54
-                                        is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ",
ed4e54
-                                        crm_element_name(rsc->xml), (double)rsc->priority);
ed4e54
-    }
ed4e54
-
ed4e54
-    flags_s = flags_string(rsc, node, options, target_role);
ed4e54
-
ed4e54
-    retval = crm_strdup_printf("%s\t(%s%s:%s):\t%s%s%s%s%s%s%s%s",
ed4e54
-                               name, class,
ed4e54
-                               provider ? provider : "",
ed4e54
-                               kind,
ed4e54
-                               orphan ? orphan : "",
ed4e54
-                               role_s,
ed4e54
-                               node_s ? node_s : "",
ed4e54
-                               print_dev_s ? print_dev_s : "",
ed4e54
-                               flags_s ? flags_s : "",
ed4e54
-                               desc ? " " : "", desc ? desc : "",
ed4e54
-                               unames ? unames : "");
ed4e54
-
ed4e54
-    free(provider);
ed4e54
-    free(role_s);
ed4e54
-    free(node_s);
ed4e54
-    free(unames);
ed4e54
-    free(print_dev_s);
ed4e54
-    free(flags_s);
ed4e54
-
ed4e54
+    retval = outstr->str;
ed4e54
+    g_string_free(outstr, FALSE);
ed4e54
     return retval;
ed4e54
 }
ed4e54
 
ed4e54
@@ -656,7 +663,6 @@ void
ed4e54
 pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
ed4e54
                        const char *name, node_t *node, long options)
ed4e54
 {
ed4e54
-    char *s = NULL;
ed4e54
     const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
ed4e54
     const char *target_role = NULL;
ed4e54
 
ed4e54
@@ -675,10 +681,6 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
ed4e54
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
ed4e54
     }
ed4e54
 
ed4e54
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        node = NULL;
ed4e54
-    }
ed4e54
-
ed4e54
     if (is_not_set(rsc->flags, pe_rsc_managed)) {
ed4e54
         cl = "rsc-managed";
ed4e54
 
ed4e54
@@ -698,10 +700,14 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc,
ed4e54
         cl = "rsc-ok";
ed4e54
     }
ed4e54
 
ed4e54
-    s = native_output_string(rsc, name, node, options, target_role);
ed4e54
-    list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
ed4e54
-    pcmk_create_html_node(list_node, "span", NULL, cl, s);
ed4e54
-    free(s);
ed4e54
+    {
ed4e54
+        gchar *s = native_output_string(rsc, name, node, options, target_role,
ed4e54
+                                        true);
ed4e54
+
ed4e54
+        list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
ed4e54
+        pcmk_create_html_node(list_node, "span", NULL, cl, s);
ed4e54
+        g_free(s);
ed4e54
+    }
ed4e54
 
ed4e54
     if (is_set(options, pe_print_details)) {
ed4e54
         GHashTableIter iter;
ed4e54
@@ -744,7 +750,6 @@ void
ed4e54
 pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
ed4e54
                        const char *name, node_t *node, long options)
ed4e54
 {
ed4e54
-    char *s = NULL;
ed4e54
     const char *target_role = NULL;
ed4e54
 
ed4e54
     CRM_ASSERT(rsc->variant == pe_native);
ed4e54
@@ -758,13 +763,13 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
ed4e54
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
ed4e54
     }
ed4e54
 
ed4e54
-    if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        node = NULL;
ed4e54
-    }
ed4e54
+    {
ed4e54
+        gchar *s = native_output_string(rsc, name, node, options, target_role,
ed4e54
+                                        true);
ed4e54
 
ed4e54
-    s = native_output_string(rsc, name, node, options, target_role);
ed4e54
-    out->list_item(out, NULL, "%s", s);
ed4e54
-    free(s);
ed4e54
+        out->list_item(out, NULL, "%s", s);
ed4e54
+        g_free(s);
ed4e54
+    }
ed4e54
 
ed4e54
     if (is_set(options, pe_print_details)) {
ed4e54
         GHashTableIter iter;
ed4e54
@@ -806,22 +811,14 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc,
ed4e54
 void
ed4e54
 common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data)
ed4e54
 {
ed4e54
-    const char *desc = NULL;
ed4e54
-    const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
ed4e54
-    const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
ed4e54
     const char *target_role = NULL;
ed4e54
-    enum rsc_role_e role = native_displayable_role(rsc);
ed4e54
-
ed4e54
-    int offset = 0;
ed4e54
-    int flagOffset = 0;
ed4e54
-    char buffer[LINE_MAX];
ed4e54
-    char flagBuffer[LINE_MAX];
ed4e54
 
ed4e54
     CRM_ASSERT(rsc->variant == pe_native);
ed4e54
-    CRM_ASSERT(kind != NULL);
ed4e54
 
ed4e54
     if (rsc->meta) {
ed4e54
-        const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC);
ed4e54
+        const char *is_internal = g_hash_table_lookup(rsc->meta,
ed4e54
+                                                      XML_RSC_ATTR_INTERNAL_RSC);
ed4e54
+
ed4e54
         if (crm_is_true(is_internal) && is_not_set(options, pe_print_implicit)) {
ed4e54
             crm_trace("skipping print of internal resource %s", rsc->id);
ed4e54
             return;
ed4e54
@@ -829,17 +826,13 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
ed4e54
         target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
ed4e54
     }
ed4e54
 
ed4e54
-    if (pre_text == NULL && (options & pe_print_printf)) {
ed4e54
-        pre_text = " ";
ed4e54
-    }
ed4e54
-
ed4e54
     if (options & pe_print_xml) {
ed4e54
         native_print_xml(rsc, pre_text, options, print_data);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        node = NULL;
ed4e54
+    if ((pre_text == NULL) && (options & pe_print_printf)) {
ed4e54
+        pre_text = " ";
ed4e54
     }
ed4e54
 
ed4e54
     if (options & pe_print_html) {
ed4e54
@@ -849,10 +842,10 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
ed4e54
         } else if (is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
             status_print("<font color=\"red\">");
ed4e54
 
ed4e54
-        } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) {
ed4e54
+        } else if (rsc->running_on == NULL) {
ed4e54
             status_print("<font color=\"red\">");
ed4e54
 
ed4e54
-        } else if (g_list_length(rsc->running_on) > 1) {
ed4e54
+        } else if (pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
             status_print("<font color=\"orange\">");
ed4e54
 
ed4e54
         } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) {
ed4e54
@@ -863,106 +856,29 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
ed4e54
         }
ed4e54
     }
ed4e54
 
ed4e54
-    if(pre_text) {
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text);
ed4e54
-    }
ed4e54
-    offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name);
ed4e54
-    offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class);
ed4e54
-    if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
ed4e54
-        const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov);
ed4e54
-    }
ed4e54
-    offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind);
ed4e54
-    if(is_set(rsc->flags, pe_rsc_orphan)) {
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED ");
ed4e54
-    }
ed4e54
-    if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role));
ed4e54
-    } else if(is_set(rsc->flags, pe_rsc_failed)) {
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED");
ed4e54
-    } else {
ed4e54
-        const char *rsc_state = native_displayable_state(rsc, options);
ed4e54
-
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state);
ed4e54
-    }
ed4e54
-
ed4e54
-    if(node) {
ed4e54
-        offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname);
ed4e54
-
ed4e54
-        if (node->details->online == FALSE && node->details->unclean) {
ed4e54
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                                   "%sUNCLEAN", comma_if(flagOffset));
ed4e54
-        }
ed4e54
-    }
ed4e54
-
ed4e54
-    if (options & pe_print_pending) {
ed4e54
-        const char *pending_task = native_pending_task(rsc);
ed4e54
-
ed4e54
-        if (pending_task) {
ed4e54
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                                   "%s%s", comma_if(flagOffset), pending_task);
ed4e54
-        }
ed4e54
-    }
ed4e54
-
ed4e54
-    if (target_role) {
ed4e54
-        enum rsc_role_e target_role_e = text2role(target_role);
ed4e54
-
ed4e54
-        /* Ignore target role Started, as it is the default anyways
ed4e54
-         * (and would also allow a Master to be Master).
ed4e54
-         * Show if target role limits our abilities. */
ed4e54
-        if (target_role_e == RSC_ROLE_STOPPED) {
ed4e54
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                                   "%sdisabled", comma_if(flagOffset));
ed4e54
-
ed4e54
-        } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable)
ed4e54
-                   && target_role_e == RSC_ROLE_SLAVE) {
ed4e54
-            flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                                   "%starget-role:%s", comma_if(flagOffset), target_role);
ed4e54
-        }
ed4e54
-    }
ed4e54
-
ed4e54
-    if (is_set(rsc->flags, pe_rsc_block)) {
ed4e54
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                               "%sblocked", comma_if(flagOffset));
ed4e54
-
ed4e54
-    } else if (is_not_set(rsc->flags, pe_rsc_managed)) {
ed4e54
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                               "%sunmanaged", comma_if(flagOffset));
ed4e54
-    }
ed4e54
-
ed4e54
-    if(is_set(rsc->flags, pe_rsc_failure_ignored)) {
ed4e54
-        flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset,
ed4e54
-                               "%sfailure ignored", comma_if(flagOffset));
ed4e54
-    }
ed4e54
-
ed4e54
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
ed4e54
-    }
ed4e54
-
ed4e54
-    CRM_LOG_ASSERT(offset > 0);
ed4e54
-    if(flagOffset > 0) {
ed4e54
-        status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:"");
ed4e54
-    } else {
ed4e54
-        status_print("%s%s%s", buffer, desc?" ":"", desc?desc:"");
ed4e54
+    {
ed4e54
+        gchar *resource_s = native_output_string(rsc, name, node, options,
ed4e54
+                                                 target_role, false);
ed4e54
+        status_print("%s%s", (pre_text? pre_text : ""), resource_s);
ed4e54
+        g_free(resource_s);
ed4e54
     }
ed4e54
 
ed4e54
 #if CURSES_ENABLED
ed4e54
-    if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) {
ed4e54
-        /* Done */
ed4e54
-
ed4e54
-    } else if (options & pe_print_ncurses) {
ed4e54
+    if (is_set(options, pe_print_ncurses)
ed4e54
+        && is_not_set(options, pe_print_rsconly)
ed4e54
+        && !pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
         /* coverity[negative_returns] False positive */
ed4e54
         move(-1, 0);
ed4e54
     }
ed4e54
 #endif
ed4e54
 
ed4e54
-    if (options & pe_print_html) {
ed4e54
+    if (is_set(options, pe_print_html)) {
ed4e54
         status_print(" </font> ");
ed4e54
     }
ed4e54
 
ed4e54
-    if ((options & pe_print_rsconly)) {
ed4e54
+    if (is_not_set(options, pe_print_rsconly)
ed4e54
+        && pcmk__list_of_multiple(rsc->running_on)) {
ed4e54
 
ed4e54
-    } else if (g_list_length(rsc->running_on) > 1) {
ed4e54
         GListPtr gIter = rsc->running_on;
ed4e54
         int counter = 0;
ed4e54
 
ed4e54
@@ -1025,10 +941,6 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n
ed4e54
         GHashTableIter iter;
ed4e54
         node_t *n = NULL;
ed4e54
 
ed4e54
-        status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text,
ed4e54
-                     is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "",
ed4e54
-                     is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ",
ed4e54
-                     crm_element_name(rsc->xml), (double)rsc->priority);
ed4e54
         status_print("%s\tAllowed Nodes", pre_text);
ed4e54
         g_hash_table_iter_init(&iter, rsc->allowed_nodes);
ed4e54
         while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) {
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 41e911be8ea9151b3f0758c2c22c0e69b8b78d93 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Thu, 19 Dec 2019 17:18:41 -0600
ed4e54
Subject: [PATCH 2/8] Log: scheduler: drop redundant trace messages
ed4e54
ed4e54
We logged "applying placement constraints" three times.
ed4e54
---
ed4e54
 lib/pacemaker/pcmk_sched_allocate.c | 17 ++++-------------
ed4e54
 1 file changed, 4 insertions(+), 13 deletions(-)
ed4e54
ed4e54
diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c
ed4e54
index ca43c71..dde8b69 100644
ed4e54
--- a/lib/pacemaker/pcmk_sched_allocate.c
ed4e54
+++ b/lib/pacemaker/pcmk_sched_allocate.c
ed4e54
@@ -623,21 +623,15 @@ check_actions(pe_working_set_t * data_set)
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
-static gboolean
ed4e54
+static void
ed4e54
 apply_placement_constraints(pe_working_set_t * data_set)
ed4e54
 {
ed4e54
-    GListPtr gIter = NULL;
ed4e54
-
ed4e54
-    crm_trace("Applying constraints...");
ed4e54
-
ed4e54
-    for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
ed4e54
+    for (GList *gIter = data_set->placement_constraints;
ed4e54
+         gIter != NULL; gIter = gIter->next) {
ed4e54
         pe__location_t *cons = gIter->data;
ed4e54
 
ed4e54
         cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
ed4e54
     }
ed4e54
-
ed4e54
-    return TRUE;
ed4e54
-
ed4e54
 }
ed4e54
 
ed4e54
 static gboolean
ed4e54
@@ -994,10 +988,7 @@ stage2(pe_working_set_t * data_set)
ed4e54
 {
ed4e54
     GListPtr gIter = NULL;
ed4e54
 
ed4e54
-    crm_trace("Applying placement constraints");
ed4e54
-
ed4e54
-    gIter = data_set->nodes;
ed4e54
-    for (; gIter != NULL; gIter = gIter->next) {
ed4e54
+    for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
ed4e54
         node_t *node = (node_t *) gIter->data;
ed4e54
 
ed4e54
         if (node == NULL) {
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 7fe136e19b5018d609beb8bad4e34234739572c9 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Sat, 7 Dec 2019 12:13:11 -0600
ed4e54
Subject: [PATCH 3/8] Refactor: libcrmcommon: convenience functions for list
ed4e54
 length comparisons
ed4e54
ed4e54
... for efficiency and readability
ed4e54
---
ed4e54
 include/crm/common/internal.h | 14 ++++++++++++++
ed4e54
 1 file changed, 14 insertions(+)
ed4e54
ed4e54
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
ed4e54
index da2c7d7..484c836 100644
ed4e54
--- a/include/crm/common/internal.h
ed4e54
+++ b/include/crm/common/internal.h
ed4e54
@@ -126,6 +126,20 @@ crm_getpid_s()
ed4e54
     return crm_strdup_printf("%lu", (unsigned long) getpid());
ed4e54
 }
ed4e54
 
ed4e54
+// More efficient than g_list_length(list) == 1
ed4e54
+static inline bool
ed4e54
+pcmk__list_of_1(GList *list)
ed4e54
+{
ed4e54
+    return list && (list->next == NULL);
ed4e54
+}
ed4e54
+
ed4e54
+// More efficient than g_list_length(list) > 1
ed4e54
+static inline bool
ed4e54
+pcmk__list_of_multiple(GList *list)
ed4e54
+{
ed4e54
+    return list && (list->next != NULL);
ed4e54
+}
ed4e54
+
ed4e54
 /* convenience functions for failure-related node attributes */
ed4e54
 
ed4e54
 #define CRM_FAIL_COUNT_PREFIX   "fail-count"
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 9ff4f6bca540576f0a3333c959e8014ed168353f Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Mon, 16 Dec 2019 14:13:30 -0600
ed4e54
Subject: [PATCH 4/8] Refactor: libcrmcommon: add convenience macros for
ed4e54
 plurals
ed4e54
ed4e54
I've avoided making s_if_plural() an official API due to its hackiness, but
ed4e54
it really is the best solution for now. Promote it to pcmk__plural_s(), along
ed4e54
with a companion macro pcmk__plural_alt() for more complicated plurals.
ed4e54
---
ed4e54
 include/crm/common/internal.h | 23 +++++++++++++++++++++++
ed4e54
 1 file changed, 23 insertions(+)
ed4e54
ed4e54
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
ed4e54
index 484c836..ee560c9 100644
ed4e54
--- a/include/crm/common/internal.h
ed4e54
+++ b/include/crm/common/internal.h
ed4e54
@@ -107,6 +107,29 @@ bool crm_compress_string(const char *data, int length, int max, char **result,
ed4e54
                          unsigned int *result_len);
ed4e54
 gint crm_alpha_sort(gconstpointer a, gconstpointer b);
ed4e54
 
ed4e54
+/* Correctly displaying singular or plural is complicated; consider "1 node has"
ed4e54
+ * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g.
ed4e54
+ *
ed4e54
+ * if (a == 1) {
ed4e54
+ *     crm_info("singular message"):
ed4e54
+ * } else {
ed4e54
+ *     crm_info("plural message");
ed4e54
+ * }
ed4e54
+ *
ed4e54
+ * though even that's not sufficient for all languages besides English (if we
ed4e54
+ * ever desire to do translations of output and log messages). But the following
ed4e54
+ * convenience macros are "good enough" and more concise for many cases.
ed4e54
+ */
ed4e54
+
ed4e54
+/* Example:
ed4e54
+ * crm_info("Found %d %s", nentries,
ed4e54
+ *          pcmk__plural_alt(nentries, "entry", "entries"));
ed4e54
+ */
ed4e54
+#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2))
ed4e54
+
ed4e54
+// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes));
ed4e54
+#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s")
ed4e54
+
ed4e54
 static inline char *
ed4e54
 crm_concat(const char *prefix, const char *suffix, char join)
ed4e54
 {
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 0378db5030400202e59b2bae0dabd65d00a3e9c8 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Thu, 12 Dec 2019 20:50:50 -0600
ed4e54
Subject: [PATCH 5/8] Log: controller: improve join messages
ed4e54
ed4e54
---
ed4e54
 daemons/controld/controld_fsa.c     |  81 ++++----
ed4e54
 daemons/controld/controld_join_dc.c | 383 +++++++++++++++++++++---------------
ed4e54
 2 files changed, 268 insertions(+), 196 deletions(-)
ed4e54
ed4e54
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
ed4e54
index 6760224..b985fa9 100644
ed4e54
--- a/daemons/controld/controld_fsa.c
ed4e54
+++ b/daemons/controld/controld_fsa.c
ed4e54
@@ -1,5 +1,5 @@
ed4e54
 /*
ed4e54
- * Copyright 2004-2019 the Pacemaker project contributors
ed4e54
+ * Copyright 2004-2020 the Pacemaker project contributors
ed4e54
  *
ed4e54
  * The version control history for this file may have further details.
ed4e54
  *
ed4e54
@@ -460,12 +460,53 @@ log_fsa_input(fsa_data_t * stored_msg)
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
+static void
ed4e54
+check_join_counts(fsa_data_t *msg_data)
ed4e54
+{
ed4e54
+    int count;
ed4e54
+    guint npeers;
ed4e54
+
ed4e54
+    count = crmd_join_phase_count(crm_join_finalized);
ed4e54
+    if (count > 0) {
ed4e54
+        crm_err("%d cluster node%s failed to confirm join",
ed4e54
+                count, pcmk__plural_s(count));
ed4e54
+        crmd_join_phase_log(LOG_NOTICE);
ed4e54
+        return;
ed4e54
+    }
ed4e54
+
ed4e54
+    npeers = crm_active_peers();
ed4e54
+    count = crmd_join_phase_count(crm_join_confirmed);
ed4e54
+    if (count == npeers) {
ed4e54
+        if (npeers == 1) {
ed4e54
+            crm_debug("Sole active cluster node is fully joined");
ed4e54
+        } else {
ed4e54
+            crm_debug("All %d active cluster nodes are fully joined", count);
ed4e54
+        }
ed4e54
+
ed4e54
+    } else if (count > npeers) {
ed4e54
+        crm_err("New election needed because more nodes confirmed join "
ed4e54
+                "than are in membership (%d > %u)", count, npeers);
ed4e54
+        register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
ed4e54
+
ed4e54
+    } else if (saved_ccm_membership_id != crm_peer_seq) {
ed4e54
+        crm_info("New join needed because membership changed (%llu -> %llu)",
ed4e54
+                 saved_ccm_membership_id, crm_peer_seq);
ed4e54
+        register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
ed4e54
+
ed4e54
+    } else {
ed4e54
+        crm_warn("Only %d of %u active cluster nodes fully joined "
ed4e54
+                 "(%d did not respond to offer)",
ed4e54
+                 count, npeers, crmd_join_phase_count(crm_join_welcomed));
ed4e54
+    }
ed4e54
+}
ed4e54
+
ed4e54
 long long
ed4e54
 do_state_transition(long long actions,
ed4e54
                     enum crmd_fsa_state cur_state,
ed4e54
                     enum crmd_fsa_state next_state, fsa_data_t * msg_data)
ed4e54
 {
ed4e54
     int level = LOG_INFO;
ed4e54
+    int count = 0;
ed4e54
     long long tmp = actions;
ed4e54
     gboolean clear_recovery_bit = TRUE;
ed4e54
 
ed4e54
@@ -563,13 +604,14 @@ do_state_transition(long long actions,
ed4e54
                 crm_warn("Progressed to state %s after %s",
ed4e54
                          fsa_state2string(next_state), fsa_cause2string(cause));
ed4e54
             }
ed4e54
-            if (crmd_join_phase_count(crm_join_welcomed) > 0) {
ed4e54
-                crm_warn("%u cluster nodes failed to respond"
ed4e54
-                         " to the join offer.", crmd_join_phase_count(crm_join_welcomed));
ed4e54
+            count = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+            if (count > 0) {
ed4e54
+                crm_warn("%d cluster node%s failed to respond to join offer",
ed4e54
+                         count, pcmk__plural_s(count));
ed4e54
                 crmd_join_phase_log(LOG_NOTICE);
ed4e54
 
ed4e54
             } else {
ed4e54
-                crm_debug("All %d cluster nodes responded to the join offer.",
ed4e54
+                crm_debug("All cluster nodes (%d) responded to join offer",
ed4e54
                           crmd_join_phase_count(crm_join_integrated));
ed4e54
             }
ed4e54
             break;
ed4e54
@@ -581,34 +623,7 @@ do_state_transition(long long actions,
ed4e54
                 crm_info("Progressed to state %s after %s",
ed4e54
                          fsa_state2string(next_state), fsa_cause2string(cause));
ed4e54
             }
ed4e54
-
ed4e54
-            if (crmd_join_phase_count(crm_join_finalized) > 0) {
ed4e54
-                crm_err("%u cluster nodes failed to confirm their join.",
ed4e54
-                        crmd_join_phase_count(crm_join_finalized));
ed4e54
-                crmd_join_phase_log(LOG_NOTICE);
ed4e54
-
ed4e54
-            } else if (crmd_join_phase_count(crm_join_confirmed)
ed4e54
-                       == crm_active_peers()) {
ed4e54
-                crm_debug("All %u cluster nodes are"
ed4e54
-                          " eligible to run resources.", crm_active_peers());
ed4e54
-
ed4e54
-            } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) {
ed4e54
-                crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
ed4e54
-                        crmd_join_phase_count(crm_join_confirmed), crm_active_peers());
ed4e54
-                register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
ed4e54
-
ed4e54
-            } else if (saved_ccm_membership_id != crm_peer_seq) {
ed4e54
-                crm_info("Membership changed: %llu -> %llu - join restart",
ed4e54
-                         saved_ccm_membership_id, crm_peer_seq);
ed4e54
-                register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
ed4e54
-
ed4e54
-            } else {
ed4e54
-                crm_warn("Only %u of %u cluster "
ed4e54
-                         "nodes are eligible to run resources - continue %d",
ed4e54
-                         crmd_join_phase_count(crm_join_confirmed),
ed4e54
-                         crm_active_peers(), crmd_join_phase_count(crm_join_welcomed));
ed4e54
-            }
ed4e54
-/* 			initialize_join(FALSE); */
ed4e54
+            check_join_counts(msg_data);
ed4e54
             break;
ed4e54
 
ed4e54
         case S_STOPPING:
ed4e54
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
ed4e54
index 988aaa6..54324b2 100644
ed4e54
--- a/daemons/controld/controld_join_dc.c
ed4e54
+++ b/daemons/controld/controld_join_dc.c
ed4e54
@@ -26,7 +26,11 @@ void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
ed4e54
 void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
ed4e54
 gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
ed4e54
 
ed4e54
+/* Numeric counter used to identify join rounds (an unsigned int would be
ed4e54
+ * appropriate, except we get and set it in XML as int)
ed4e54
+ */
ed4e54
 static int current_join_id = 0;
ed4e54
+
ed4e54
 unsigned long long saved_ccm_membership_id = 0;
ed4e54
 
ed4e54
 void
ed4e54
@@ -34,12 +38,7 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
ed4e54
 {
ed4e54
     enum crm_join_phase last = 0;
ed4e54
 
ed4e54
-    if(node == NULL) {
ed4e54
-        crm_err("Could not update join because node not specified"
ed4e54
-                CRM_XS " join-%u source=%s phase=%s",
ed4e54
-                current_join_id, source, crm_join_phase_str(phase));
ed4e54
-        return;
ed4e54
-    }
ed4e54
+    CRM_CHECK(node != NULL, return);
ed4e54
 
ed4e54
     /* Remote nodes do not participate in joins */
ed4e54
     if (is_set(node->flags, crm_remote_node)) {
ed4e54
@@ -49,21 +48,23 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
ed4e54
     last = node->join;
ed4e54
 
ed4e54
     if(phase == last) {
ed4e54
-        crm_trace("%s: Node %s[%u] - join-%u phase still %s",
ed4e54
-                  source, node->uname, node->id, current_join_id,
ed4e54
-                  crm_join_phase_str(last));
ed4e54
+        crm_trace("Node %s join-%d phase is still %s "
ed4e54
+                  CRM_XS " nodeid=%u source=%s",
ed4e54
+                  node->uname, current_join_id, crm_join_phase_str(last),
ed4e54
+                  node->id, source);
ed4e54
 
ed4e54
     } else if ((phase <= crm_join_none) || (phase == (last + 1))) {
ed4e54
         node->join = phase;
ed4e54
-        crm_info("%s: Node %s[%u] - join-%u phase %s -> %s",
ed4e54
-                 source, node->uname, node->id, current_join_id,
ed4e54
-                 crm_join_phase_str(last), crm_join_phase_str(phase));
ed4e54
+        crm_trace("Node %s join-%d phase is now %s (was %s) "
ed4e54
+                  CRM_XS " nodeid=%u source=%s",
ed4e54
+                 node->uname, current_join_id, crm_join_phase_str(phase),
ed4e54
+                 crm_join_phase_str(last), node->id, source);
ed4e54
 
ed4e54
     } else {
ed4e54
-        crm_err("Could not update join for node %s because phase transition invalid "
ed4e54
-                CRM_XS " join-%u source=%s node_id=%u last=%s new=%s",
ed4e54
-                node->uname, current_join_id, source, node->id,
ed4e54
-                crm_join_phase_str(last), crm_join_phase_str(phase));
ed4e54
+        crm_warn("Rejecting join-%d phase update for node %s because "
ed4e54
+                 "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
ed4e54
+                 current_join_id, node->uname, crm_join_phase_str(last),
ed4e54
+                 crm_join_phase_str(phase), node->id, source);
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
@@ -73,9 +74,7 @@ initialize_join(gboolean before)
ed4e54
     GHashTableIter iter;
ed4e54
     crm_node_t *peer = NULL;
ed4e54
 
ed4e54
-    /* clear out/reset a bunch of stuff */
ed4e54
-    crm_debug("join-%d: Initializing join data (flag=%s)",
ed4e54
-              current_join_id, before ? "true" : "false");
ed4e54
+    crm_debug("Starting new join round join-%d", current_join_id);
ed4e54
 
ed4e54
     g_hash_table_iter_init(&iter, crm_peer_cache);
ed4e54
     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
ed4e54
@@ -128,7 +127,9 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
ed4e54
 
ed4e54
     CRM_ASSERT(member != NULL);
ed4e54
     if (crm_is_peer_active(member) == FALSE) {
ed4e54
-        crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
ed4e54
+        crm_info("Not making join-%d offer to inactive node %s",
ed4e54
+                 current_join_id,
ed4e54
+                 (member->uname? member->uname : "with unknown name"));
ed4e54
         if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
ed4e54
             /* You would think this unsafe, but in fact this plus an
ed4e54
              * active resource is what causes it to be fenced.
ed4e54
@@ -145,17 +146,21 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
ed4e54
     }
ed4e54
 
ed4e54
     if (member->uname == NULL) {
ed4e54
-        crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid);
ed4e54
+        crm_info("Not making join-%d offer to node uuid %s with unknown name",
ed4e54
+                 current_join_id, member->uuid);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
     if (saved_ccm_membership_id != crm_peer_seq) {
ed4e54
         saved_ccm_membership_id = crm_peer_seq;
ed4e54
-        crm_info("Making join offers based on membership %llu", crm_peer_seq);
ed4e54
+        crm_info("Making join-%d offers based on membership event %llu",
ed4e54
+                 current_join_id, crm_peer_seq);
ed4e54
     }
ed4e54
 
ed4e54
     if(user_data && member->join > crm_join_none) {
ed4e54
-        crm_info("Skipping %s: already known %d", member->uname, member->join);
ed4e54
+        crm_info("Not making join-%d offer to already known node %s (%s)",
ed4e54
+                 current_join_id, member->uname,
ed4e54
+                 crm_join_phase_str(member->join));
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
@@ -166,14 +171,11 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
ed4e54
     // Advertise our feature set so the joining node can bail if not compatible
ed4e54
     crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
ed4e54
 
ed4e54
-    /* send the welcome */
ed4e54
-    crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
ed4e54
-
ed4e54
+    crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
ed4e54
     send_cluster_message(member, crm_msg_crmd, offer, TRUE);
ed4e54
     free_xml(offer);
ed4e54
 
ed4e54
     crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
ed4e54
-    /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
ed4e54
 }
ed4e54
 
ed4e54
 /*	 A_DC_JOIN_OFFER_ALL	*/
ed4e54
@@ -183,6 +185,8 @@ do_dc_join_offer_all(long long action,
ed4e54
                      enum crmd_fsa_state cur_state,
ed4e54
                      enum crmd_fsa_input current_input, fsa_data_t * msg_data)
ed4e54
 {
ed4e54
+    int count;
ed4e54
+
ed4e54
     /* Reset everyone's status back to down or in_ccm in the CIB.
ed4e54
      * Any nodes that are active in the CIB but not in the cluster membership
ed4e54
      * will be seen as offline by the scheduler anyway.
ed4e54
@@ -197,9 +201,11 @@ do_dc_join_offer_all(long long action,
ed4e54
     }
ed4e54
     g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
ed4e54
 
ed4e54
+    count = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+    crm_info("Waiting on join-%d requests from %d outstanding node%s",
ed4e54
+             current_join_id, count, pcmk__plural_s(count));
ed4e54
+
ed4e54
     // Don't waste time by invoking the scheduler yet
ed4e54
-    crm_info("join-%d: Waiting on %d outstanding join acks",
ed4e54
-             current_join_id, crmd_join_phase_count(crm_join_welcomed));
ed4e54
 }
ed4e54
 
ed4e54
 /*	 A_DC_JOIN_OFFER_ONE	*/
ed4e54
@@ -211,50 +217,40 @@ do_dc_join_offer_one(long long action,
ed4e54
 {
ed4e54
     crm_node_t *member;
ed4e54
     ha_msg_input_t *welcome = NULL;
ed4e54
-
ed4e54
-    const char *op = NULL;
ed4e54
+    int count;
ed4e54
     const char *join_to = NULL;
ed4e54
 
ed4e54
-    if (msg_data->data) {
ed4e54
-        welcome = fsa_typed_data(fsa_dt_ha_msg);
ed4e54
-
ed4e54
-    } else {
ed4e54
-        crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
ed4e54
+    if (msg_data->data == NULL) {
ed4e54
+        crm_info("Making join-%d offers to any unconfirmed nodes "
ed4e54
+                 "because an unknown node joined", current_join_id);
ed4e54
         g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
ed4e54
         check_join_state(cur_state, __FUNCTION__);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
+    welcome = fsa_typed_data(fsa_dt_ha_msg);
ed4e54
     if (welcome == NULL) {
ed4e54
-        crm_err("Attempt to send welcome message without a message to reply to!");
ed4e54
+        // fsa_typed_data() already logged an error
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
     join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
ed4e54
     if (join_to == NULL) {
ed4e54
-        crm_err("Attempt to send welcome message without a host to reply to!");
ed4e54
+        crm_err("Can't make join-%d offer to unknown node", current_join_id);
ed4e54
         return;
ed4e54
     }
ed4e54
-
ed4e54
     member = crm_get_peer(0, join_to);
ed4e54
-    op = crm_element_value(welcome->msg, F_CRM_TASK);
ed4e54
-    if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
ed4e54
-        /* note: it _is_ possible that a node will have been
ed4e54
-         *  sick or starting up when the original offer was made.
ed4e54
-         *  however, it will either re-announce itself in due course
ed4e54
-         *  _or_ we can re-store the original offer on the client.
ed4e54
-         */
ed4e54
-        crm_trace("(Re-)offering membership to %s...", join_to);
ed4e54
-    }
ed4e54
 
ed4e54
-    crm_info("join-%d: Processing %s request from %s in state %s",
ed4e54
-             current_join_id, op, join_to, fsa_state2string(cur_state));
ed4e54
+    /* It is possible that a node will have been sick or starting up when the
ed4e54
+     * original offer was made. However, it will either re-announce itself in
ed4e54
+     * due course, or we can re-store the original offer on the client.
ed4e54
+     */
ed4e54
 
ed4e54
     crm_update_peer_join(__FUNCTION__, member, crm_join_none);
ed4e54
     join_make_offer(NULL, member, NULL);
ed4e54
 
ed4e54
-    /* always offer to the DC (ourselves)
ed4e54
-     * this ensures the correct value for max_generation_from
ed4e54
+    /* If the offer isn't to the local node, make an offer to the local node as
ed4e54
+     * well, to ensure the correct value for max_generation_from.
ed4e54
      */
ed4e54
     if (strcmp(join_to, fsa_our_uname) != 0) {
ed4e54
         member = crm_get_peer(0, fsa_our_uname);
ed4e54
@@ -266,9 +262,11 @@ do_dc_join_offer_one(long long action,
ed4e54
      */
ed4e54
     abort_transition(INFINITY, tg_restart, "Node join", NULL);
ed4e54
 
ed4e54
+    count = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+    crm_info("Waiting on join-%d requests from %d outstanding node%s",
ed4e54
+             current_join_id, count, pcmk__plural_s(count));
ed4e54
+
ed4e54
     // Don't waste time by invoking the scheduler yet
ed4e54
-    crm_debug("Waiting on %d outstanding join acks for join-%d",
ed4e54
-              crmd_join_phase_count(crm_join_welcomed), current_join_id);
ed4e54
 }
ed4e54
 
ed4e54
 static int
ed4e54
@@ -301,22 +299,31 @@ do_dc_join_filter_offer(long long action,
ed4e54
 
ed4e54
     int cmp = 0;
ed4e54
     int join_id = -1;
ed4e54
+    int count = 0;
ed4e54
     gboolean ack_nack_bool = TRUE;
ed4e54
-    const char *ack_nack = CRMD_JOINSTATE_MEMBER;
ed4e54
     ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
ed4e54
 
ed4e54
     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
ed4e54
     const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
ed4e54
     const char *join_version = crm_element_value(join_ack->msg,
ed4e54
                                                  XML_ATTR_CRM_VERSION);
ed4e54
+    crm_node_t *join_node = NULL;
ed4e54
 
ed4e54
-    crm_node_t *join_node = crm_get_peer(0, join_from);
ed4e54
-
ed4e54
-    crm_debug("Processing req from %s", join_from);
ed4e54
+    if (join_from == NULL) {
ed4e54
+        crm_err("Ignoring invalid join request without node name");
ed4e54
+        return;
ed4e54
+    }
ed4e54
+    join_node = crm_get_peer(0, join_from);
ed4e54
 
ed4e54
-    generation = join_ack->xml;
ed4e54
     crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
ed4e54
+    if (join_id != current_join_id) {
ed4e54
+        crm_debug("Ignoring join-%d request from %s because we are on join-%d",
ed4e54
+                  join_id, join_from, current_join_id);
ed4e54
+        check_join_state(cur_state, __FUNCTION__);
ed4e54
+        return;
ed4e54
+    }
ed4e54
 
ed4e54
+    generation = join_ack->xml;
ed4e54
     if (max_generation_xml != NULL && generation != NULL) {
ed4e54
         int lpc = 0;
ed4e54
 
ed4e54
@@ -331,68 +338,71 @@ do_dc_join_filter_offer(long long action,
ed4e54
         }
ed4e54
     }
ed4e54
 
ed4e54
-    if (join_id != current_join_id) {
ed4e54
-        crm_debug("Invalid response from %s: join-%d vs. join-%d",
ed4e54
-                  join_from, join_id, current_join_id);
ed4e54
-        check_join_state(cur_state, __FUNCTION__);
ed4e54
-        return;
ed4e54
+    if (ref == NULL) {
ed4e54
+        ref = "none"; // for logging only
ed4e54
+    }
ed4e54
 
ed4e54
-    } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
ed4e54
-        crm_err("Node %s is not a member", join_from);
ed4e54
+    if (crm_is_peer_active(join_node) == FALSE) {
ed4e54
+        crm_err("Rejecting join-%d request from inactive node %s "
ed4e54
+                CRM_XS " ref=%s", join_id, join_from, ref);
ed4e54
         ack_nack_bool = FALSE;
ed4e54
 
ed4e54
     } else if (generation == NULL) {
ed4e54
-        crm_err("Generation was NULL");
ed4e54
+        crm_err("Rejecting invalid join-%d request from node %s "
ed4e54
+                "missing CIB generation " CRM_XS " ref=%s",
ed4e54
+                join_id, join_from, ref);
ed4e54
         ack_nack_bool = FALSE;
ed4e54
 
ed4e54
     } else if ((join_version == NULL)
ed4e54
                || !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
ed4e54
-        crm_err("Node %s feature set (%s) is incompatible with ours (%s)",
ed4e54
-                join_from, (join_version? join_version : "pre-3.1.0"),
ed4e54
-                CRM_FEATURE_SET);
ed4e54
+        crm_err("Rejecting join-%d request from node %s because feature set %s"
ed4e54
+                " is incompatible with ours (%s) " CRM_XS " ref=%s",
ed4e54
+                join_id, join_from, (join_version? join_version : "pre-3.1.0"),
ed4e54
+                CRM_FEATURE_SET, ref);
ed4e54
         ack_nack_bool = FALSE;
ed4e54
 
ed4e54
     } else if (max_generation_xml == NULL) {
ed4e54
+        crm_debug("Accepting join-%d request from %s "
ed4e54
+                  "(with first CIB generation) " CRM_XS " ref=%s",
ed4e54
+                  join_id, join_from, ref);
ed4e54
         max_generation_xml = copy_xml(generation);
ed4e54
         max_generation_from = strdup(join_from);
ed4e54
 
ed4e54
     } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
ed4e54
-        crm_debug("%s has a better generation number than"
ed4e54
-                  " the current max %s", join_from, max_generation_from);
ed4e54
-        if (max_generation_xml) {
ed4e54
-            crm_log_xml_debug(max_generation_xml, "Max generation");
ed4e54
-        }
ed4e54
-        crm_log_xml_debug(generation, "Their generation");
ed4e54
+        crm_debug("Accepting join-%d request from %s (with better "
ed4e54
+                  "CIB generation than current best from %s) " CRM_XS " ref=%s",
ed4e54
+                  join_id, join_from, max_generation_from, ref);
ed4e54
+        crm_log_xml_debug(max_generation_xml, "Old max generation");
ed4e54
+        crm_log_xml_debug(generation, "New max generation");
ed4e54
 
ed4e54
         free(max_generation_from);
ed4e54
         free_xml(max_generation_xml);
ed4e54
 
ed4e54
         max_generation_from = strdup(join_from);
ed4e54
         max_generation_xml = copy_xml(join_ack->xml);
ed4e54
+
ed4e54
+    } else {
ed4e54
+        crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
ed4e54
+                  join_id, join_from, ref);
ed4e54
     }
ed4e54
 
ed4e54
     if (ack_nack_bool == FALSE) {
ed4e54
-        /* NACK this client */
ed4e54
-        ack_nack = CRMD_JOINSTATE_NACK;
ed4e54
         crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
ed4e54
-        crm_err("Rejecting cluster join request from %s " CRM_XS
ed4e54
-                " NACK join-%d ref=%s", join_from, join_id, ref);
ed4e54
-
ed4e54
+        crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
ed4e54
     } else {
ed4e54
-        crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
ed4e54
         crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
ed4e54
+        crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
ed4e54
     }
ed4e54
 
ed4e54
-    crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
ed4e54
-
ed4e54
-    crm_debug("%u nodes have been integrated into join-%d",
ed4e54
-              crmd_join_phase_count(crm_join_integrated), join_id);
ed4e54
-
ed4e54
+    count = crmd_join_phase_count(crm_join_integrated);
ed4e54
+    crm_debug("%d node%s currently integrated in join-%d",
ed4e54
+              count, pcmk__plural_s(count), join_id);
ed4e54
 
ed4e54
     if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
ed4e54
         // Don't waste time by invoking the scheduler yet
ed4e54
-        crm_debug("join-%d: Still waiting on %d outstanding offers",
ed4e54
-                  join_id, crmd_join_phase_count(crm_join_welcomed));
ed4e54
+        count = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+        crm_debug("Waiting on join-%d requests from %d outstanding node%s",
ed4e54
+                  join_id, count, pcmk__plural_s(count));
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
@@ -405,21 +415,24 @@ do_dc_join_finalize(long long action,
ed4e54
 {
ed4e54
     char *sync_from = NULL;
ed4e54
     int rc = pcmk_ok;
ed4e54
+    int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+    int count_integrated = crmd_join_phase_count(crm_join_integrated);
ed4e54
 
ed4e54
     /* This we can do straight away and avoid clients timing us out
ed4e54
      *  while we compute the latest CIB
ed4e54
      */
ed4e54
-    crm_debug("Finalizing join-%d for %d clients",
ed4e54
-              current_join_id, crmd_join_phase_count(crm_join_integrated));
ed4e54
-
ed4e54
-    crmd_join_phase_log(LOG_INFO);
ed4e54
-    if (crmd_join_phase_count(crm_join_welcomed) != 0) {
ed4e54
-        crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
ed4e54
+    if (count_welcomed != 0) {
ed4e54
+        crm_debug("Waiting on join-%d requests from %d outstanding node%s "
ed4e54
+                  "before finalizing join", current_join_id, count_welcomed,
ed4e54
+                  pcmk__plural_s(count_welcomed));
ed4e54
+        crmd_join_phase_log(LOG_DEBUG);
ed4e54
         /* crmd_fsa_stall(FALSE); Needed? */
ed4e54
         return;
ed4e54
 
ed4e54
-    } else if (crmd_join_phase_count(crm_join_integrated) == 0) {
ed4e54
-        /* Nothing to do */
ed4e54
+    } else if (count_integrated == 0) {
ed4e54
+        crm_debug("Finalization not needed for join-%d at the current time",
ed4e54
+                  current_join_id);
ed4e54
+        crmd_join_phase_log(LOG_DEBUG);
ed4e54
         check_join_state(fsa_state, __FUNCTION__);
ed4e54
         return;
ed4e54
     }
ed4e54
@@ -430,8 +443,9 @@ do_dc_join_finalize(long long action,
ed4e54
     }
ed4e54
 
ed4e54
     if (is_set(fsa_input_register, R_IN_TRANSITION)) {
ed4e54
-        crm_warn("Delaying response to cluster join offer while transition in progress "
ed4e54
-                 CRM_XS " join-%d", current_join_id);
ed4e54
+        crm_warn("Delaying join-%d finalization while transition in progress",
ed4e54
+                 current_join_id);
ed4e54
+        crmd_join_phase_log(LOG_DEBUG);
ed4e54
         crmd_fsa_stall(FALSE);
ed4e54
         return;
ed4e54
     }
ed4e54
@@ -440,18 +454,20 @@ do_dc_join_finalize(long long action,
ed4e54
         /* ask for the agreed best CIB */
ed4e54
         sync_from = strdup(max_generation_from);
ed4e54
         set_bit(fsa_input_register, R_CIB_ASKED);
ed4e54
-        crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
ed4e54
-                   CRM_XS " join-%d", sync_from, current_join_id);
ed4e54
-        crm_log_xml_notice(max_generation_xml, "Requested version");
ed4e54
+        crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
ed4e54
+                   current_join_id, count_integrated,
ed4e54
+                   pcmk__plural_s(count_integrated), sync_from);
ed4e54
+        crm_log_xml_notice(max_generation_xml, "Requested CIB version");
ed4e54
 
ed4e54
     } else {
ed4e54
         /* Send _our_ CIB out to everyone */
ed4e54
         sync_from = strdup(fsa_our_uname);
ed4e54
-        crm_info("join-%d: Syncing our CIB to the rest of the cluster",
ed4e54
-                 current_join_id);
ed4e54
-        crm_log_xml_debug(max_generation_xml, "Requested version");
ed4e54
+        crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
ed4e54
+                  current_join_id, count_integrated,
ed4e54
+                  pcmk__plural_s(count_integrated));
ed4e54
+        crm_log_xml_debug(max_generation_xml, "Requested CIB version");
ed4e54
     }
ed4e54
-
ed4e54
+    crmd_join_phase_log(LOG_DEBUG);
ed4e54
 
ed4e54
     rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
ed4e54
     fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
ed4e54
@@ -463,26 +479,33 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi
ed4e54
     CRM_LOG_ASSERT(-EPERM != rc);
ed4e54
     clear_bit(fsa_input_register, R_CIB_ASKED);
ed4e54
     if (rc != pcmk_ok) {
ed4e54
-        do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
ed4e54
-                   "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
ed4e54
+        do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
ed4e54
+                   "Could not sync CIB from %s in join-%d: %s",
ed4e54
+                   (char *) user_data, current_join_id, pcmk_strerror(rc));
ed4e54
 
ed4e54
         /* restart the whole join process */
ed4e54
         register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
ed4e54
 
ed4e54
-    } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
ed4e54
+    } else if (!AM_I_DC) {
ed4e54
+        crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
ed4e54
+
ed4e54
+    } else if (fsa_state != S_FINALIZE_JOIN) {
ed4e54
+        crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
ed4e54
+                  current_join_id, fsa_state2string(fsa_state));
ed4e54
+
ed4e54
+    } else {
ed4e54
         set_bit(fsa_input_register, R_HAVE_CIB);
ed4e54
         clear_bit(fsa_input_register, R_CIB_ASKED);
ed4e54
 
ed4e54
         /* make sure dc_uuid is re-set to us */
ed4e54
         if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
ed4e54
-            crm_debug("Notifying %d clients of join-%d results",
ed4e54
-                      crmd_join_phase_count(crm_join_integrated), current_join_id);
ed4e54
+            int count_integrated = crmd_join_phase_count(crm_join_integrated);
ed4e54
+
ed4e54
+            crm_debug("Notifying %d node%s of join-%d results",
ed4e54
+                      count_integrated, pcmk__plural_s(count_integrated),
ed4e54
+                      current_join_id);
ed4e54
             g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
ed4e54
         }
ed4e54
-
ed4e54
-    } else {
ed4e54
-        crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s",
ed4e54
-                  AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state));
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
@@ -492,11 +515,14 @@ join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * outp
ed4e54
     fsa_data_t *msg_data = NULL;
ed4e54
 
ed4e54
     if (rc == pcmk_ok) {
ed4e54
-        crm_debug("Join update %d complete", call_id);
ed4e54
+        crm_debug("join-%d node history update (via CIB call %d) complete",
ed4e54
+                  current_join_id, call_id);
ed4e54
         check_join_state(fsa_state, __FUNCTION__);
ed4e54
 
ed4e54
     } else {
ed4e54
-        crm_err("Join update %d failed", call_id);
ed4e54
+        crm_err("join-%d node history update (via CIB call %d) failed: %s "
ed4e54
+                "(next transition may determine resource status incorrectly)",
ed4e54
+                current_join_id, call_id, pcmk_strerror(rc));
ed4e54
         crm_log_xml_debug(msg, "failed");
ed4e54
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
ed4e54
     }
ed4e54
@@ -515,61 +541,75 @@ do_dc_join_ack(long long action,
ed4e54
 
ed4e54
     const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
ed4e54
     const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
ed4e54
-    crm_node_t *peer = crm_get_peer(0, join_from);
ed4e54
+    crm_node_t *peer = NULL;
ed4e54
 
ed4e54
-    if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
ed4e54
-        crm_debug("Ignoring op=%s message from %s", op, join_from);
ed4e54
+    // Sanity checks
ed4e54
+    if (join_from == NULL) {
ed4e54
+        crm_warn("Ignoring message received without node identification");
ed4e54
+        return;
ed4e54
+    }
ed4e54
+    if (op == NULL) {
ed4e54
+        crm_warn("Ignoring message received from %s without task", join_from);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
-    crm_trace("Processing ack from %s", join_from);
ed4e54
-    crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
ed4e54
+    if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
ed4e54
+        crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
ed4e54
+                  op, join_from, CRM_OP_JOIN_CONFIRM);
ed4e54
+        return;
ed4e54
+    }
ed4e54
 
ed4e54
+    if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
ed4e54
+        crm_warn("Ignoring join confirmation from %s without valid join ID",
ed4e54
+                 join_from);
ed4e54
+        return;
ed4e54
+    }
ed4e54
+
ed4e54
+    peer = crm_get_peer(0, join_from);
ed4e54
     if (peer->join != crm_join_finalized) {
ed4e54
-        crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
ed4e54
-                 join_id, join_from, peer->join);
ed4e54
+        crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
ed4e54
+                 "(currently %s not %s)",
ed4e54
+                 join_id, join_from, crm_join_phase_str(peer->join),
ed4e54
+                 crm_join_phase_str(crm_join_finalized));
ed4e54
         return;
ed4e54
+    }
ed4e54
 
ed4e54
-    } else if (join_id != current_join_id) {
ed4e54
-        crm_err("Invalid response from %s: join-%d vs. join-%d",
ed4e54
-                join_from, join_id, current_join_id);
ed4e54
+    if (join_id != current_join_id) {
ed4e54
+        crm_err("Rejecting join-%d confirmation from %s "
ed4e54
+                "because currently on join-%d",
ed4e54
+                join_id, join_from, current_join_id);
ed4e54
         crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
     crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);
ed4e54
 
ed4e54
-    crm_info("join-%d: Updating node state to %s for %s",
ed4e54
-             join_id, CRMD_JOINSTATE_MEMBER, join_from);
ed4e54
-
ed4e54
-    /* update CIB with the current LRM status from the node
ed4e54
-     * We don't need to notify the TE of these updates, a transition will
ed4e54
-     *   be started in due time
ed4e54
+    /* Update CIB with node's current executor state. A new transition will be
ed4e54
+     * triggered later, when the CIB notifies us of the change.
ed4e54
      */
ed4e54
     erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
ed4e54
-
ed4e54
     if (safe_str_eq(join_from, fsa_our_uname)) {
ed4e54
         xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
ed4e54
 
ed4e54
         if (now_dc_lrmd_state != NULL) {
ed4e54
-            crm_debug("Local executor state updated from query");
ed4e54
             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
ed4e54
                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
ed4e54
             free_xml(now_dc_lrmd_state);
ed4e54
+            crm_debug("Updating local node history for join-%d "
ed4e54
+                      "from query result (via CIB call %d)", join_id, call_id);
ed4e54
         } else {
ed4e54
-            crm_warn("Local executor state updated from join acknowledgement because query failed");
ed4e54
             fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
ed4e54
                 cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
ed4e54
+            crm_warn("Updating local node history from join-%d confirmation "
ed4e54
+                     "because query failed (via CIB call %d)", join_id, call_id);
ed4e54
         }
ed4e54
     } else {
ed4e54
-        crm_debug("Executor state for %s updated from join acknowledgement",
ed4e54
-                  join_from);
ed4e54
         fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
ed4e54
            cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
ed4e54
+        crm_debug("Updating node history for %s from join-%d confirmation "
ed4e54
+                  "(via CIB call %d)", join_from, join_id, call_id);
ed4e54
     }
ed4e54
-
ed4e54
     fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
ed4e54
-    crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id);
ed4e54
 }
ed4e54
 
ed4e54
 void
ed4e54
@@ -581,17 +621,16 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
ed4e54
     const char *join_to = join_node->uname;
ed4e54
 
ed4e54
     if(join_node->join != crm_join_integrated) {
ed4e54
-        crm_trace("Skipping %s in state %d", join_to, join_node->join);
ed4e54
+        crm_trace("Not updating non-integrated node %s (%s) for join-%d",
ed4e54
+                  join_to, crm_join_phase_str(join_node->join),
ed4e54
+                  current_join_id);
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
-    /* make sure a node entry exists for the new node */
ed4e54
-    crm_trace("Creating node entry for %s", join_to);
ed4e54
-
ed4e54
+    crm_trace("Updating node state for %s", join_to);
ed4e54
     tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
ed4e54
     set_uuid(tmp1, XML_ATTR_UUID, join_node);
ed4e54
     crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
ed4e54
-
ed4e54
     fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
ed4e54
     free_xml(tmp1);
ed4e54
 
ed4e54
@@ -610,11 +649,10 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
ed4e54
         return;
ed4e54
     }
ed4e54
 
ed4e54
-    /* send the ack/nack to the node */
ed4e54
-    acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
ed4e54
-
ed4e54
-    crm_debug("join-%d: ACK'ing join request from %s",
ed4e54
+    // Acknowledge node's join request
ed4e54
+    crm_debug("Acknowledging join-%d request from %s",
ed4e54
               current_join_id, join_to);
ed4e54
+    acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
ed4e54
     crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
ed4e54
     crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
ed4e54
     crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
ed4e54
@@ -629,11 +667,11 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
ed4e54
 {
ed4e54
     static unsigned long long highest_seq = 0;
ed4e54
 
ed4e54
-    crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
ed4e54
-
ed4e54
     if (saved_ccm_membership_id != crm_peer_seq) {
ed4e54
-        crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
ed4e54
-                  source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
ed4e54
+        crm_debug("join-%d: Membership changed from %llu to %llu "
ed4e54
+                  CRM_XS " highest=%llu state=%s for=%s",
ed4e54
+                  current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
ed4e54
+                  fsa_state2string(cur_state), source);
ed4e54
         if(highest_seq < crm_peer_seq) {
ed4e54
             /* Don't spam the FSA with duplicates */
ed4e54
             highest_seq = crm_peer_seq;
ed4e54
@@ -642,34 +680,53 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
ed4e54
 
ed4e54
     } else if (cur_state == S_INTEGRATION) {
ed4e54
         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
ed4e54
-            crm_debug("join-%d: Integration of %d peers complete: %s",
ed4e54
-                      current_join_id, crmd_join_phase_count(crm_join_integrated), source);
ed4e54
+            int count = crmd_join_phase_count(crm_join_integrated);
ed4e54
+
ed4e54
+            crm_debug("join-%d: Integration of %d peer%s complete "
ed4e54
+                      CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, count, pcmk__plural_s(count),
ed4e54
+                      fsa_state2string(cur_state), source);
ed4e54
             register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
ed4e54
             return TRUE;
ed4e54
         }
ed4e54
 
ed4e54
     } else if (cur_state == S_FINALIZE_JOIN) {
ed4e54
         if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
ed4e54
-            crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
ed4e54
+            crm_debug("join-%d: Delaying finalization until we have CIB "
ed4e54
+                      CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, fsa_state2string(cur_state), source);
ed4e54
             return TRUE;
ed4e54
 
ed4e54
         } else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
ed4e54
-            crm_debug("join-%d: Still waiting on %d welcomed nodes",
ed4e54
-                      current_join_id, crmd_join_phase_count(crm_join_welcomed));
ed4e54
+            int count = crmd_join_phase_count(crm_join_welcomed);
ed4e54
+
ed4e54
+            crm_debug("join-%d: Still waiting on %d welcomed node%s "
ed4e54
+                      CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, count, pcmk__plural_s(count),
ed4e54
+                      fsa_state2string(cur_state), source);
ed4e54
             crmd_join_phase_log(LOG_DEBUG);
ed4e54
 
ed4e54
         } else if (crmd_join_phase_count(crm_join_integrated) != 0) {
ed4e54
-            crm_debug("join-%d: Still waiting on %d integrated nodes",
ed4e54
-                      current_join_id, crmd_join_phase_count(crm_join_integrated));
ed4e54
+            int count = crmd_join_phase_count(crm_join_integrated);
ed4e54
+
ed4e54
+            crm_debug("join-%d: Still waiting on %d integrated node%s "
ed4e54
+                      CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, count, pcmk__plural_s(count),
ed4e54
+                      fsa_state2string(cur_state), source);
ed4e54
             crmd_join_phase_log(LOG_DEBUG);
ed4e54
 
ed4e54
         } else if (crmd_join_phase_count(crm_join_finalized) != 0) {
ed4e54
-            crm_debug("join-%d: Still waiting on %d finalized nodes",
ed4e54
-                      current_join_id, crmd_join_phase_count(crm_join_finalized));
ed4e54
+            int count = crmd_join_phase_count(crm_join_finalized);
ed4e54
+
ed4e54
+            crm_debug("join-%d: Still waiting on %d finalized node%s "
ed4e54
+                      CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, count, pcmk__plural_s(count),
ed4e54
+                      fsa_state2string(cur_state), source);
ed4e54
             crmd_join_phase_log(LOG_DEBUG);
ed4e54
 
ed4e54
         } else {
ed4e54
-            crm_debug("join-%d complete: %s", current_join_id, source);
ed4e54
+            crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
ed4e54
+                      current_join_id, fsa_state2string(cur_state), source);
ed4e54
             register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
ed4e54
             return TRUE;
ed4e54
         }
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 034b27734d05e8aeddb586f2daaede8314f9516f Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Fri, 13 Dec 2019 10:39:34 -0600
ed4e54
Subject: [PATCH 6/8] Log: controller: improve CIB status deletion messages
ed4e54
ed4e54
---
ed4e54
 daemons/controld/controld_utils.c | 25 +++++++++++++++++--------
ed4e54
 1 file changed, 17 insertions(+), 8 deletions(-)
ed4e54
ed4e54
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
ed4e54
index 3acd488..bb8ace9 100644
ed4e54
--- a/daemons/controld/controld_utils.c
ed4e54
+++ b/daemons/controld/controld_utils.c
ed4e54
@@ -751,14 +751,18 @@ update_dc(xmlNode * msg)
ed4e54
     return TRUE;
ed4e54
 }
ed4e54
 
ed4e54
-#define STATUS_PATH_MAX 512
ed4e54
 static void
ed4e54
 erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
ed4e54
 {
ed4e54
     char *xpath = user_data;
ed4e54
 
ed4e54
-    do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
ed4e54
-                        "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc);
ed4e54
+    if (rc == 0) {
ed4e54
+        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
ed4e54
+                  xpath, call_id);
ed4e54
+    } else {
ed4e54
+        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
ed4e54
+                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
ed4e54
+    }
ed4e54
 }
ed4e54
 
ed4e54
 #define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
ed4e54
@@ -766,14 +770,19 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
ed4e54
 void
ed4e54
 erase_status_tag(const char *uname, const char *tag, int options)
ed4e54
 {
ed4e54
-    if (fsa_cib_conn && uname) {
ed4e54
+    CRM_CHECK(uname != NULL, return);
ed4e54
+
ed4e54
+    if (fsa_cib_conn == NULL) {
ed4e54
+        crm_warn("Unable to delete CIB '%s' section for node %s: "
ed4e54
+                 "no CIB connection", tag, uname);
ed4e54
+    } else {
ed4e54
         int call_id;
ed4e54
         char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
ed4e54
 
ed4e54
-        crm_info("Deleting %s status entries for %s " CRM_XS " xpath=%s",
ed4e54
-                 tag, uname, xpath);
ed4e54
-        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL,
ed4e54
-                                             cib_quorum_override | cib_xpath | options);
ed4e54
+        options |= cib_quorum_override|cib_xpath;
ed4e54
+        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
ed4e54
+        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
ed4e54
+                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
ed4e54
         fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
ed4e54
         // CIB library handles freeing xpath
ed4e54
     }
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From 73510818bc9905dcc130893198590b10c0067425 Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Fri, 13 Dec 2019 10:36:56 -0600
ed4e54
Subject: [PATCH 7/8] Refactor: controller: move erase_status_tag() to
ed4e54
 controld_based.c
ed4e54
ed4e54
---
ed4e54
 daemons/controld/controld_based.c | 38 ++++++++++++++++++++++++++++++++++++++
ed4e54
 daemons/controld/controld_utils.c | 37 -------------------------------------
ed4e54
 2 files changed, 38 insertions(+), 37 deletions(-)
ed4e54
ed4e54
diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
ed4e54
index e6a4612..1db5650 100644
ed4e54
--- a/daemons/controld/controld_based.c
ed4e54
+++ b/daemons/controld/controld_based.c
ed4e54
@@ -168,3 +168,41 @@ controld_action_is_recordable(const char *action)
ed4e54
     }
ed4e54
     return TRUE;
ed4e54
 }
ed4e54
+
ed4e54
+static void
ed4e54
+erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
ed4e54
+                     void *user_data)
ed4e54
+{
ed4e54
+    char *xpath = user_data;
ed4e54
+
ed4e54
+    if (rc == 0) {
ed4e54
+        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
ed4e54
+                  xpath, call_id);
ed4e54
+    } else {
ed4e54
+        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
ed4e54
+                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
ed4e54
+    }
ed4e54
+}
ed4e54
+
ed4e54
+#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
ed4e54
+
ed4e54
+void
ed4e54
+erase_status_tag(const char *uname, const char *tag, int options)
ed4e54
+{
ed4e54
+    CRM_CHECK(uname != NULL, return);
ed4e54
+
ed4e54
+    if (fsa_cib_conn == NULL) {
ed4e54
+        crm_warn("Unable to delete CIB '%s' section for node %s: "
ed4e54
+                 "no CIB connection", tag, uname);
ed4e54
+    } else {
ed4e54
+        int call_id;
ed4e54
+        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
ed4e54
+
ed4e54
+        options |= cib_quorum_override|cib_xpath;
ed4e54
+        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
ed4e54
+        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
ed4e54
+                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
ed4e54
+        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
ed4e54
+        // CIB library handles freeing xpath
ed4e54
+    }
ed4e54
+}
ed4e54
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
ed4e54
index bb8ace9..4ed6aeb 100644
ed4e54
--- a/daemons/controld/controld_utils.c
ed4e54
+++ b/daemons/controld/controld_utils.c
ed4e54
@@ -751,43 +751,6 @@ update_dc(xmlNode * msg)
ed4e54
     return TRUE;
ed4e54
 }
ed4e54
 
ed4e54
-static void
ed4e54
-erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
ed4e54
-{
ed4e54
-    char *xpath = user_data;
ed4e54
-
ed4e54
-    if (rc == 0) {
ed4e54
-        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
ed4e54
-                  xpath, call_id);
ed4e54
-    } else {
ed4e54
-        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
ed4e54
-                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
ed4e54
-    }
ed4e54
-}
ed4e54
-
ed4e54
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
ed4e54
-
ed4e54
-void
ed4e54
-erase_status_tag(const char *uname, const char *tag, int options)
ed4e54
-{
ed4e54
-    CRM_CHECK(uname != NULL, return);
ed4e54
-
ed4e54
-    if (fsa_cib_conn == NULL) {
ed4e54
-        crm_warn("Unable to delete CIB '%s' section for node %s: "
ed4e54
-                 "no CIB connection", tag, uname);
ed4e54
-    } else {
ed4e54
-        int call_id;
ed4e54
-        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
ed4e54
-
ed4e54
-        options |= cib_quorum_override|cib_xpath;
ed4e54
-        call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
ed4e54
-        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
ed4e54
-                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
ed4e54
-        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
ed4e54
-        // CIB library handles freeing xpath
ed4e54
-    }
ed4e54
-}
ed4e54
-
ed4e54
 void crmd_peer_down(crm_node_t *peer, bool full) 
ed4e54
 {
ed4e54
     if(full && peer->state == NULL) {
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54
ed4e54
ed4e54
From c4cc759e733db894957d039f65572cc21704224f Mon Sep 17 00:00:00 2001
ed4e54
From: Ken Gaillot <kgaillot@redhat.com>
ed4e54
Date: Fri, 13 Dec 2019 11:16:25 -0600
ed4e54
Subject: [PATCH 8/8] Refactor: controller: improve efficiency when deleting
ed4e54
 node state
ed4e54
ed4e54
Rename erase_status_xpath() to controld_delete_node_state() to follow current
ed4e54
naming practice.
ed4e54
ed4e54
Instead of passing it a node_state subsection name, pass a new enum value
ed4e54
indicating what to erase (resource history, transient node attributes, or
ed4e54
both). This allows us to improve the log messages further, as well as improving
ed4e54
efficiency when both need to be cleared.
ed4e54
---
ed4e54
 daemons/controld/controld_based.c     | 69 +++++++++++++++++++++++++++--------
ed4e54
 daemons/controld/controld_callbacks.c |  8 +++-
ed4e54
 daemons/controld/controld_execd.c     |  3 +-
ed4e54
 daemons/controld/controld_fencing.c   |  5 +--
ed4e54
 daemons/controld/controld_join_dc.c   |  3 +-
ed4e54
 daemons/controld/controld_remote_ra.c | 24 ++++++------
ed4e54
 daemons/controld/controld_utils.h     | 11 +++++-
ed4e54
 7 files changed, 87 insertions(+), 36 deletions(-)
ed4e54
ed4e54
diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c
ed4e54
index 1db5650..008a02d 100644
ed4e54
--- a/daemons/controld/controld_based.c
ed4e54
+++ b/daemons/controld/controld_based.c
ed4e54
@@ -170,39 +170,76 @@ controld_action_is_recordable(const char *action)
ed4e54
 }
ed4e54
 
ed4e54
 static void
ed4e54
-erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
ed4e54
-                     void *user_data)
ed4e54
+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
ed4e54
+                    void *user_data)
ed4e54
 {
ed4e54
-    char *xpath = user_data;
ed4e54
+    char *desc = user_data;
ed4e54
 
ed4e54
     if (rc == 0) {
ed4e54
-        crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
ed4e54
-                  xpath, call_id);
ed4e54
+        crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
ed4e54
     } else {
ed4e54
-        crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
ed4e54
-                 CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
ed4e54
+        crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
ed4e54
+                 desc, call_id, pcmk_strerror(rc), rc);
ed4e54
     }
ed4e54
 }
ed4e54
 
ed4e54
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
ed4e54
+// Searches for various portions of node_state to delete
ed4e54
 
ed4e54
+// Match a particular node's node_state (takes node name 1x)
ed4e54
+#define XPATH_NODE_STATE        "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
ed4e54
+
ed4e54
+// Node's lrm section (name 1x)
ed4e54
+#define XPATH_NODE_LRM          XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
ed4e54
+
ed4e54
+// Node's transient_attributes section (name 1x)
ed4e54
+#define XPATH_NODE_ATTRS        XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
ed4e54
+
ed4e54
+// Everything under node_state (name 1x)
ed4e54
+#define XPATH_NODE_ALL          XPATH_NODE_STATE "/*"
ed4e54
+
ed4e54
+/*!
ed4e54
+ * \internal
ed4e54
+ * \brief Delete subsection of a node's CIB node_state
ed4e54
+ *
ed4e54
+ * \param[in] uname    Desired node
ed4e54
+ * \param[in] section  Subsection of node_state to delete
ed4e54
+ * \param[in] options  CIB call options to use
ed4e54
+ */
ed4e54
 void
ed4e54
-erase_status_tag(const char *uname, const char *tag, int options)
ed4e54
+controld_delete_node_state(const char *uname, enum controld_section_e section,
ed4e54
+                           int options)
ed4e54
 {
ed4e54
+    char *xpath = NULL;
ed4e54
+    char *desc = NULL;
ed4e54
+
ed4e54
     CRM_CHECK(uname != NULL, return);
ed4e54
+    switch (section) {
ed4e54
+        case controld_section_lrm:
ed4e54
+            xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
ed4e54
+            desc = crm_strdup_printf("resource history for node %s", uname);
ed4e54
+            break;
ed4e54
+        case controld_section_attrs:
ed4e54
+            xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
ed4e54
+            desc = crm_strdup_printf("transient attributes for node %s", uname);
ed4e54
+            break;
ed4e54
+        case controld_section_all:
ed4e54
+            xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
ed4e54
+            desc = crm_strdup_printf("all state for node %s", uname);
ed4e54
+            break;
ed4e54
+    }
ed4e54
 
ed4e54
     if (fsa_cib_conn == NULL) {
ed4e54
-        crm_warn("Unable to delete CIB '%s' section for node %s: "
ed4e54
-                 "no CIB connection", tag, uname);
ed4e54
+        crm_warn("Unable to delete %s: no CIB connection", desc);
ed4e54
+        free(desc);
ed4e54
     } else {
ed4e54
         int call_id;
ed4e54
-        char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
ed4e54
 
ed4e54
         options |= cib_quorum_override|cib_xpath;
ed4e54
         call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options);
ed4e54
-        crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
ed4e54
-                 CRM_XS " xpath=%s", tag, uname, call_id, xpath);
ed4e54
-        fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
ed4e54
-        // CIB library handles freeing xpath
ed4e54
+        crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
ed4e54
+                 desc, call_id, xpath);
ed4e54
+        fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback);
ed4e54
+        // CIB library handles freeing desc
ed4e54
     }
ed4e54
+    free(xpath);
ed4e54
 }
ed4e54
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
ed4e54
index 5cbd392..f7e3db2 100644
ed4e54
--- a/daemons/controld/controld_callbacks.c
ed4e54
+++ b/daemons/controld/controld_callbacks.c
ed4e54
@@ -200,14 +200,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
ed4e54
                  * transient attributes intact until it rejoins.
ed4e54
                  */
ed4e54
                 if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
ed4e54
-                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
ed4e54
+                    controld_delete_node_state(node->uname,
ed4e54
+                                               controld_section_attrs,
ed4e54
+                                               cib_scope_local);
ed4e54
                 }
ed4e54
 
ed4e54
             } else if(AM_I_DC) {
ed4e54
                 if (appeared) {
ed4e54
                     te_trigger_stonith_history_sync(FALSE);
ed4e54
                 } else {
ed4e54
-                    erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
ed4e54
+                    controld_delete_node_state(node->uname,
ed4e54
+                                               controld_section_attrs,
ed4e54
+                                               cib_scope_local);
ed4e54
                 }
ed4e54
             }
ed4e54
             break;
ed4e54
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
ed4e54
index 46c1958..b7deeae 100644
ed4e54
--- a/daemons/controld/controld_execd.c
ed4e54
+++ b/daemons/controld/controld_execd.c
ed4e54
@@ -1411,7 +1411,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
ed4e54
     }
ed4e54
 
ed4e54
     /* Now delete the copy in the CIB */
ed4e54
-    erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
ed4e54
+    controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
ed4e54
+                               cib_scope_local);
ed4e54
 
ed4e54
     /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE
ed4e54
      * would result in the scheduler sending us back here again
ed4e54
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
ed4e54
index d9b1e1e..9897cf3 100644
ed4e54
--- a/daemons/controld/controld_fencing.c
ed4e54
+++ b/daemons/controld/controld_fencing.c
ed4e54
@@ -229,9 +229,8 @@ send_stonith_update(crm_action_t *action, const char *target, const char *uuid)
ed4e54
     /* Make sure it sticks */
ed4e54
     /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local);    */
ed4e54
 
ed4e54
-    erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
ed4e54
-    erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
ed4e54
-
ed4e54
+    controld_delete_node_state(peer->uname, controld_section_all,
ed4e54
+                               cib_scope_local);
ed4e54
     free_xml(node_state);
ed4e54
     return;
ed4e54
 }
ed4e54
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
ed4e54
index 54324b2..ac6b430 100644
ed4e54
--- a/daemons/controld/controld_join_dc.c
ed4e54
+++ b/daemons/controld/controld_join_dc.c
ed4e54
@@ -587,7 +587,8 @@ do_dc_join_ack(long long action,
ed4e54
     /* Update CIB with node's current executor state. A new transition will be
ed4e54
      * triggered later, when the CIB notifies us of the change.
ed4e54
      */
ed4e54
-    erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
ed4e54
+    controld_delete_node_state(join_from, controld_section_lrm,
ed4e54
+                               cib_scope_local);
ed4e54
     if (safe_str_eq(join_from, fsa_our_uname)) {
ed4e54
         xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
ed4e54
 
ed4e54
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
ed4e54
index 4fbae45..2d3dfa7 100644
ed4e54
--- a/daemons/controld/controld_remote_ra.c
ed4e54
+++ b/daemons/controld/controld_remote_ra.c
ed4e54
@@ -181,13 +181,13 @@ remote_node_up(const char *node_name)
ed4e54
     CRM_CHECK(node_name != NULL, return);
ed4e54
     crm_info("Announcing pacemaker_remote node %s", node_name);
ed4e54
 
ed4e54
-    /* Clear node's operation history. The node's transient attributes should
ed4e54
-     * and normally will be cleared when the node leaves, but since remote node
ed4e54
-     * state has a number of corner cases, clear them here as well, to be sure.
ed4e54
+    /* Clear node's entire state (resource history and transient attributes).
ed4e54
+     * The transient attributes should and normally will be cleared when the
ed4e54
+     * node leaves, but since remote node state has a number of corner cases,
ed4e54
+     * clear them here as well, to be sure.
ed4e54
      */
ed4e54
     call_opt = crmd_cib_smart_opt();
ed4e54
-    erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
ed4e54
-    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
ed4e54
+    controld_delete_node_state(node_name, controld_section_all, call_opt);
ed4e54
 
ed4e54
     /* Clear node's probed attribute */
ed4e54
     update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
ed4e54
@@ -252,15 +252,15 @@ remote_node_down(const char *node_name, const enum down_opts opts)
ed4e54
     /* Purge node from attrd's memory */
ed4e54
     update_attrd_remote_node_removed(node_name, NULL);
ed4e54
 
ed4e54
-    /* Purge node's transient attributes */
ed4e54
-    erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
ed4e54
-
ed4e54
-    /* Normally, the LRM operation history should be kept until the node comes
ed4e54
-     * back up. However, after a successful fence, we want to clear it, so we
ed4e54
-     * don't think resources are still running on the node.
ed4e54
+    /* Normally, only node attributes should be erased, and the resource history
ed4e54
+     * should be kept until the node comes back up. However, after a successful
ed4e54
+     * fence, we want to clear the history as well, so we don't think resources
ed4e54
+     * are still running on the node.
ed4e54
      */
ed4e54
     if (opts == DOWN_ERASE_LRM) {
ed4e54
-        erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
ed4e54
+        controld_delete_node_state(node_name, controld_section_all, call_opt);
ed4e54
+    } else {
ed4e54
+        controld_delete_node_state(node_name, controld_section_attrs, call_opt);
ed4e54
     }
ed4e54
 
ed4e54
     /* Ensure node is in the remote peer cache with lost state */
ed4e54
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
ed4e54
index cf04f13..f902361 100644
ed4e54
--- a/daemons/controld/controld_utils.h
ed4e54
+++ b/daemons/controld/controld_utils.h
ed4e54
@@ -70,7 +70,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags,
ed4e54
                                   xmlNode *parent, const char *source);
ed4e54
 void populate_cib_nodes(enum node_update_flags flags, const char *source);
ed4e54
 void crm_update_quorum(gboolean quorum, gboolean force_update);
ed4e54
-void erase_status_tag(const char *uname, const char *tag, int options);
ed4e54
 void controld_close_attrd_ipc(void);
ed4e54
 void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
ed4e54
 void update_attrd_remote_node_removed(const char *host, const char *user_name);
ed4e54
@@ -87,6 +86,16 @@ unsigned int cib_op_timeout(void);
ed4e54
 bool feature_set_compatible(const char *dc_version, const char *join_version);
ed4e54
 bool controld_action_is_recordable(const char *action);
ed4e54
 
ed4e54
+// Subsections of node_state
ed4e54
+enum controld_section_e {
ed4e54
+    controld_section_lrm,
ed4e54
+    controld_section_attrs,
ed4e54
+    controld_section_all,
ed4e54
+};
ed4e54
+
ed4e54
+void controld_delete_node_state(const char *uname,
ed4e54
+                                enum controld_section_e section, int options);
ed4e54
+
ed4e54
 const char *get_node_id(xmlNode *lrm_rsc_op);
ed4e54
 
ed4e54
 /* Convenience macro for registering a CIB callback
ed4e54
-- 
ed4e54
1.8.3.1
ed4e54