Blame SOURCES/003-security-log.patch

a125f5
From bccf845261c6e69fc4e6bdb8cf4e630a4a4ec7a8 Mon Sep 17 00:00:00 2001
a125f5
From: Ken Gaillot <kgaillot@redhat.com>
a125f5
Date: Fri, 12 Apr 2019 09:46:51 -0500
a125f5
Subject: [PATCH] Log: libcrmcluster: improve CPG membership messages
a125f5
a125f5
Show CPG event reason when provided by corosync, make messages more readable,
a125f5
upgrade duplicate pid messages to warnings (and log only one message in those
a125f5
cases).
a125f5
---
a125f5
 lib/cluster/cpg.c | 91 ++++++++++++++++++++++++++++++++++---------------------
a125f5
 1 file changed, 56 insertions(+), 35 deletions(-)
a125f5
a125f5
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
a125f5
index 2898c51..ef6fa36 100644
a125f5
--- a/lib/cluster/cpg.c
a125f5
+++ b/lib/cluster/cpg.c
a125f5
@@ -360,8 +360,6 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void
a125f5
     return NULL;
a125f5
 }
a125f5
 
a125f5
-#define PEER_NAME(peer) ((peer)? ((peer)->uname? (peer)->uname : "<unknown>") : "<none>")
a125f5
-
a125f5
 static int cmp_member_list_nodeid(const void *first,
a125f5
                                   const void *second)
a125f5
 {
a125f5
@@ -376,6 +374,32 @@ static int cmp_member_list_nodeid(const void *first,
a125f5
     return 0;
a125f5
 }
a125f5
 
a125f5
+static const char *
a125f5
+cpgreason2str(cpg_reason_t reason)
a125f5
+{
a125f5
+    switch (reason) {
a125f5
+        case CPG_REASON_JOIN:       return " via cpg_join";
a125f5
+        case CPG_REASON_LEAVE:      return " via cpg_leave";
a125f5
+        case CPG_REASON_NODEDOWN:   return " via cluster exit";
a125f5
+        case CPG_REASON_NODEUP:     return " via cluster join";
a125f5
+        case CPG_REASON_PROCDOWN:   return " for unknown reason";
a125f5
+        default:                    break;
a125f5
+    }
a125f5
+    return "";
a125f5
+}
a125f5
+
a125f5
+static inline const char *
a125f5
+peer_name(crm_node_t *peer)
a125f5
+{
a125f5
+    if (peer == NULL) {
a125f5
+        return "unknown node";
a125f5
+    } else if (peer->uname == NULL) {
a125f5
+        return "peer node";
a125f5
+    } else {
a125f5
+        return peer->uname;
a125f5
+    }
a125f5
+}
a125f5
+
a125f5
 void
a125f5
 pcmk_cpg_membership(cpg_handle_t handle,
a125f5
                     const struct cpg_name *groupName,
a125f5
@@ -387,7 +411,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
     gboolean found = FALSE;
a125f5
     static int counter = 0;
a125f5
     uint32_t local_nodeid = get_local_nodeid(handle);
a125f5
-    const struct cpg_address *key, **rival, **sorted;
a125f5
+    const struct cpg_address *key, **sorted;
a125f5
 
a125f5
     sorted = malloc(member_list_entries * sizeof(const struct cpg_address *));
a125f5
     CRM_ASSERT(sorted != NULL);
a125f5
@@ -401,10 +425,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
 
a125f5
     for (i = 0; i < left_list_entries; i++) {
a125f5
         crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);
a125f5
-
a125f5
-        crm_info("Group event %s.%d: node %u (%s) left: %llu",
a125f5
-                 groupName->value, counter, left_list[i].nodeid,
a125f5
-                 PEER_NAME(peer), (unsigned long long) left_list[i].pid);
a125f5
+        const struct cpg_address **rival = NULL;
a125f5
 
a125f5
         /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation
a125f5
            and not playing by this rule may go wild in case of multiple
a125f5
@@ -418,7 +439,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
            also API end-point carriers, and that's what matters locally
a125f5
            (who's the winner);
a125f5
            remotely, we will just compare leave_list and member_list and if
a125f5
-           the left process has it's node retained in member_list (under some
a125f5
+           the left process has its node retained in member_list (under some
a125f5
            other PID, anyway) we will just ignore it as well
a125f5
            XXX: long-term fix is to establish in-out PID-aware tracking? */
a125f5
         if (peer) {
a125f5
@@ -426,50 +447,51 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
             rival = bsearch(&key, sorted, member_list_entries,
a125f5
                             sizeof(const struct cpg_address *),
a125f5
                             cmp_member_list_nodeid);
a125f5
-            if (rival == NULL) {
a125f5
+        }
a125f5
+
a125f5
+        if (rival == NULL) {
a125f5
+            crm_info("Group %s event %d: %s (node %u pid %u) left%s",
a125f5
+                     groupName->value, counter, peer_name(peer),
a125f5
+                     left_list[i].nodeid, left_list[i].pid,
a125f5
+                     cpgreason2str(left_list[i].reason));
a125f5
+            if (peer) {
a125f5
                 crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
a125f5
                                      OFFLINESTATUS);
a125f5
-            } else if (left_list[i].nodeid == local_nodeid) {
a125f5
-                crm_info("Ignoring the above event %s.%d, comes from a local"
a125f5
-                         " rival process (presumably not us): %llu",
a125f5
-                         groupName->value, counter,
a125f5
-                         (unsigned long long) left_list[i].pid);
a125f5
-            } else {
a125f5
-                crm_info("Ignoring the above event %s.%d, comes from"
a125f5
-                         " a rival-rich node: %llu (e.g. %llu process"
a125f5
-                         " carries on)",
a125f5
-                         groupName->value, counter,
a125f5
-                         (unsigned long long) left_list[i].pid,
a125f5
-                         (unsigned long long) (*rival)->pid);
a125f5
             }
a125f5
+        } else if (left_list[i].nodeid == local_nodeid) {
a125f5
+            crm_warn("Group %s event %d: duplicate local pid %u left%s",
a125f5
+                     groupName->value, counter,
a125f5
+                     left_list[i].pid, cpgreason2str(left_list[i].reason));
a125f5
+        } else {
a125f5
+            crm_warn("Group %s event %d: "
a125f5
+                     "%s (node %u) duplicate pid %u left%s (%u remains)",
a125f5
+                     groupName->value, counter, peer_name(peer),
a125f5
+                     left_list[i].nodeid, left_list[i].pid,
a125f5
+                     cpgreason2str(left_list[i].reason), (*rival)->pid);
a125f5
         }
a125f5
     }
a125f5
     free(sorted);
a125f5
     sorted = NULL;
a125f5
 
a125f5
     for (i = 0; i < joined_list_entries; i++) {
a125f5
-        crm_info("Group event %s.%d: node %u joined: %llu"
a125f5
-                 " (unchecked for rivals)",
a125f5
+        crm_info("Group %s event %d: node %u pid %u joined%s",
a125f5
                  groupName->value, counter, joined_list[i].nodeid,
a125f5
-                 (unsigned long long) joined_list[i].pid);
a125f5
+                 joined_list[i].pid, cpgreason2str(joined_list[i].reason));
a125f5
     }
a125f5
 
a125f5
     for (i = 0; i < member_list_entries; i++) {
a125f5
         crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
a125f5
 
a125f5
-        crm_info("Group event %s.%d: node %u (%s) is member: %llu"
a125f5
-                 " (at least once)",
a125f5
-                 groupName->value, counter, member_list[i].nodeid,
a125f5
-                 PEER_NAME(peer), member_list[i].pid);
a125f5
-
a125f5
         if (member_list[i].nodeid == local_nodeid
a125f5
                 && member_list[i].pid != getpid()) {
a125f5
             /* see the note above */
a125f5
-            crm_info("Ignoring the above event %s.%d, comes from a local rival"
a125f5
-                     " process: %llu", groupName->value, counter,
a125f5
-                     (unsigned long long) member_list[i].pid);
a125f5
+            crm_warn("Group %s event %d: detected duplicate local pid %u",
a125f5
+                     groupName->value, counter, member_list[i].pid);
a125f5
             continue;
a125f5
         }
a125f5
+        crm_info("Group %s event %d: %s (node %u pid %u) is member",
a125f5
+                 groupName->value, counter, peer_name(peer),
a125f5
+                 member_list[i].nodeid, member_list[i].pid);
a125f5
 
a125f5
         /* If the caller left auto-reaping enabled, this will also update the
a125f5
          * state to member.
a125f5
@@ -492,8 +514,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
 
a125f5
             } else if (now > (peer->when_lost + 60)) {
a125f5
                 // If it persists for more than a minute, update the state
a125f5
-                crm_warn("Node %u member of group %s but believed offline"
a125f5
-                         " (unchecked for rivals)",
a125f5
+                crm_warn("Node %u is member of group %s but was believed offline",
a125f5
                          member_list[i].nodeid, groupName->value);
a125f5
                 crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0);
a125f5
             }
a125f5
@@ -505,7 +526,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
a125f5
     }
a125f5
 
a125f5
     if (!found) {
a125f5
-        crm_err("We're not part of CPG group '%s' anymore!", groupName->value);
a125f5
+        crm_err("Local node was evicted from group %s", groupName->value);
a125f5
         cpg_evicted = TRUE;
a125f5
     }
a125f5
 
a125f5
-- 
a125f5
1.8.3.1
a125f5