Blame SOURCES/008-security-log.patch

c54a00
From 83811e2115f5516a7faec2e653b1be3d58b35a79 Mon Sep 17 00:00:00 2001
c54a00
From: Ken Gaillot <kgaillot@redhat.com>
c54a00
Date: Fri, 12 Apr 2019 09:46:51 -0500
c54a00
Subject: [PATCH 1/2] Log: libcrmcluster: improve CPG membership messages
c54a00
c54a00
Show CPG event reason when provided by corosync, make messages more readable,
c54a00
upgrade duplicate pid messages to warnings (and log only one message in those
c54a00
cases).
c54a00
c54a00
This also fixes a typo in 4d6f6e01 that led to using an index with the wrong
c54a00
array, potentially leading to use of an uninitialized value or invalid memory
c54a00
access.
c54a00
---
c54a00
 lib/cluster/cpg.c | 95 +++++++++++++++++++++++++++++++++----------------------
c54a00
 1 file changed, 58 insertions(+), 37 deletions(-)
c54a00
c54a00
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
c54a00
index c5ecc67..85476be 100644
c54a00
--- a/lib/cluster/cpg.c
c54a00
+++ b/lib/cluster/cpg.c
c54a00
@@ -399,6 +399,32 @@ static int cmp_member_list_nodeid(const void *first,
c54a00
     return 0;
c54a00
 }
c54a00
 
c54a00
+static const char *
c54a00
+cpgreason2str(cpg_reason_t reason)
c54a00
+{
c54a00
+    switch (reason) {
c54a00
+        case CPG_REASON_JOIN:       return " via cpg_join";
c54a00
+        case CPG_REASON_LEAVE:      return " via cpg_leave";
c54a00
+        case CPG_REASON_NODEDOWN:   return " via cluster exit";
c54a00
+        case CPG_REASON_NODEUP:     return " via cluster join";
c54a00
+        case CPG_REASON_PROCDOWN:   return " for unknown reason";
c54a00
+        default:                    break;
c54a00
+    }
c54a00
+    return "";
c54a00
+}
c54a00
+
c54a00
+static inline const char *
c54a00
+peer_name(crm_node_t *peer)
c54a00
+{
c54a00
+    if (peer == NULL) {
c54a00
+        return "unknown node";
c54a00
+    } else if (peer->uname == NULL) {
c54a00
+        return "peer node";
c54a00
+    } else {
c54a00
+        return peer->uname;
c54a00
+    }
c54a00
+}
c54a00
+
c54a00
 void
c54a00
 pcmk_cpg_membership(cpg_handle_t handle,
c54a00
                     const struct cpg_name *groupName,
c54a00
@@ -410,7 +436,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
     gboolean found = FALSE;
c54a00
     static int counter = 0;
c54a00
     uint32_t local_nodeid = get_local_nodeid(handle);
c54a00
-    const struct cpg_address *key, **rival, **sorted;
c54a00
+    const struct cpg_address *key, **sorted;
c54a00
 
c54a00
     sorted = malloc(member_list_entries * sizeof(const struct cpg_address *));
c54a00
     CRM_ASSERT(sorted != NULL);
c54a00
@@ -424,11 +450,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
 
c54a00
     for (i = 0; i < left_list_entries; i++) {
c54a00
         crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);
c54a00
-
c54a00
-        crm_info("Node %u left group %s (peer=%s:%llu, counter=%d.%d)",
c54a00
-                 left_list[i].nodeid, groupName->value,
c54a00
-                 (peer? peer->uname : "<none>"),
c54a00
-                 (unsigned long long) left_list[i].pid, counter, i);
c54a00
+        const struct cpg_address **rival = NULL;
c54a00
 
c54a00
         /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation
c54a00
            and not playing by this rule may go wild in case of multiple
c54a00
@@ -442,7 +464,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
            also API end-point carriers, and that's what matters locally
c54a00
            (who's the winner);
c54a00
            remotely, we will just compare leave_list and member_list and if
c54a00
-           the left process has it's node retained in member_list (under some
c54a00
+           the left process has its node retained in member_list (under some
c54a00
            other PID, anyway) we will just ignore it as well
c54a00
            XXX: long-term fix is to establish in-out PID-aware tracking? */
c54a00
         if (peer) {
c54a00
@@ -450,51 +472,51 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
             rival = bsearch(&key, sorted, member_list_entries,
c54a00
                             sizeof(const struct cpg_address *),
c54a00
                             cmp_member_list_nodeid);
c54a00
-            if (rival == NULL) {
c54a00
+        }
c54a00
+
c54a00
+        if (rival == NULL) {
c54a00
+            crm_info("Group %s event %d: %s (node %u pid %u) left%s",
c54a00
+                     groupName->value, counter, peer_name(peer),
c54a00
+                     left_list[i].nodeid, left_list[i].pid,
c54a00
+                     cpgreason2str(left_list[i].reason));
c54a00
+            if (peer) {
c54a00
                 crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
c54a00
                                      OFFLINESTATUS);
c54a00
-            } else if (left_list[i].nodeid == local_nodeid) {
c54a00
-                crm_info("Ignoring the above event %s.%d, comes from a local"
c54a00
-                         " rival process (presumably not us): %llu",
c54a00
-                         groupName->value, counter,
c54a00
-                         (unsigned long long) left_list[i].pid);
c54a00
-            } else {
c54a00
-                crm_info("Ignoring the above event %s.%d, comes from"
c54a00
-                         " a rival-rich node: %llu (e.g. %llu process"
c54a00
-                         " carries on)",
c54a00
-                         groupName->value, counter,
c54a00
-                         (unsigned long long) left_list[i].pid,
c54a00
-                         (unsigned long long) (*rival)->pid);
c54a00
             }
c54a00
+        } else if (left_list[i].nodeid == local_nodeid) {
c54a00
+            crm_warn("Group %s event %d: duplicate local pid %u left%s",
c54a00
+                     groupName->value, counter,
c54a00
+                     left_list[i].pid, cpgreason2str(left_list[i].reason));
c54a00
+        } else {
c54a00
+            crm_warn("Group %s event %d: "
c54a00
+                     "%s (node %u) duplicate pid %u left%s (%u remains)",
c54a00
+                     groupName->value, counter, peer_name(peer),
c54a00
+                     left_list[i].nodeid, left_list[i].pid,
c54a00
+                     cpgreason2str(left_list[i].reason), (*rival)->pid);
c54a00
         }
c54a00
     }
c54a00
     free(sorted);
c54a00
     sorted = NULL;
c54a00
 
c54a00
     for (i = 0; i < joined_list_entries; i++) {
c54a00
-        crm_info("Node %u joined group %s (counter=%d.%d, pid=%llu,"
c54a00
-                 " unchecked for rivals)",
c54a00
-                 joined_list[i].nodeid, groupName->value, counter, i,
c54a00
-                 (unsigned long long) left_list[i].pid);
c54a00
+        crm_info("Group %s event %d: node %u pid %u joined%s",
c54a00
+                 groupName->value, counter, joined_list[i].nodeid,
c54a00
+                 joined_list[i].pid, cpgreason2str(joined_list[i].reason));
c54a00
     }
c54a00
 
c54a00
     for (i = 0; i < member_list_entries; i++) {
c54a00
         crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
c54a00
 
c54a00
-        crm_info("Node %u still member of group %s (peer=%s:%llu,"
c54a00
-                 " counter=%d.%d, at least once)",
c54a00
-                 member_list[i].nodeid, groupName->value,
c54a00
-                 (peer? peer->uname : "<none>"), member_list[i].pid,
c54a00
-                 counter, i);
c54a00
-
c54a00
         if (member_list[i].nodeid == local_nodeid
c54a00
                 && member_list[i].pid != getpid()) {
c54a00
             /* see the note above */
c54a00
-            crm_info("Ignoring the above event %s.%d, comes from a local rival"
c54a00
-                     " process: %llu", groupName->value, counter,
c54a00
-                     (unsigned long long) member_list[i].pid);
c54a00
+            crm_warn("Group %s event %d: detected duplicate local pid %u",
c54a00
+                     groupName->value, counter, member_list[i].pid);
c54a00
             continue;
c54a00
         }
c54a00
+        crm_info("Group %s event %d: %s (node %u pid %u) is member",
c54a00
+                 groupName->value, counter, peer_name(peer),
c54a00
+                 member_list[i].nodeid, member_list[i].pid);
c54a00
 
c54a00
         /* Anyone that is sending us CPG messages must also be a _CPG_ member.
c54a00
          * But it's _not_ safe to assume it's in the quorum membership.
c54a00
@@ -514,9 +536,8 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
                  *
c54a00
                  * Set the threshold to 1 minute
c54a00
                  */
c54a00
-                crm_err("Node %s[%u] appears to be online even though we think"
c54a00
-                        " it is dead (unchecked for rivals)",
c54a00
-                        peer->uname, peer->id);
c54a00
+                crm_warn("Node %u is member of group %s but was believed offline",
c54a00
+                         member_list[i].nodeid, groupName->value);
c54a00
                 if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) {
c54a00
                     peer->votes = 0;
c54a00
                 }
c54a00
@@ -529,7 +550,7 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
     }
c54a00
 
c54a00
     if (!found) {
c54a00
-        crm_err("We're not part of CPG group '%s' anymore!", groupName->value);
c54a00
+        crm_err("Local node was evicted from group %s", groupName->value);
c54a00
         cpg_evicted = TRUE;
c54a00
     }
c54a00
 
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 87769895ebccc1033a876ef98a21577d6f4d1c0e Mon Sep 17 00:00:00 2001
c54a00
From: Ken Gaillot <kgaillot@redhat.com>
c54a00
Date: Thu, 18 Apr 2019 22:18:27 -0500
c54a00
Subject: [PATCH 2/2] Fix: libcrmcluster,pacemakerd: restore compatibility with
c54a00
 corosync 1
c54a00
c54a00
Pacemaker 1.1 supports older versions of corosync that don't supply
c54a00
cs_strerror() or CMAP. This simply drops usage cs_strerror() (in favor of just
c54a00
the raw error code, as before 07a82c5c) and properly conditionalizes CMAP
c54a00
usage.
c54a00
---
c54a00
 lib/cluster/cpg.c | 12 ++++--------
c54a00
 mcp/corosync.c    | 13 +++++++------
c54a00
 2 files changed, 11 insertions(+), 14 deletions(-)
c54a00
c54a00
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
c54a00
index 85476be..e4783e5 100644
c54a00
--- a/lib/cluster/cpg.c
c54a00
+++ b/lib/cluster/cpg.c
c54a00
@@ -91,15 +91,13 @@ uint32_t get_local_nodeid(cpg_handle_t handle)
c54a00
         crm_trace("Creating connection");
c54a00
         cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb));
c54a00
         if (rc != CS_OK) {
c54a00
-            crm_err("Could not connect to the CPG API: %s (%d)",
c54a00
-                    cs_strerror(rc), rc);
c54a00
+            crm_err("Could not connect to the CPG API (rc=%d)", rc);
c54a00
             return 0;
c54a00
         }
c54a00
 
c54a00
         rc = cpg_fd_get(local_handle, &fd;;
c54a00
         if (rc != CS_OK) {
c54a00
-            crm_err("Could not obtain the CPG API connection: %s (%d)",
c54a00
-                    cs_strerror(rc), rc);
c54a00
+            crm_err("Could not obtain the CPG API connection (rc=%d)", rc);
c54a00
             goto bail;
c54a00
         }
c54a00
 
c54a00
@@ -594,15 +592,13 @@ cluster_connect_cpg(crm_cluster_t *cluster)
c54a00
 
c54a00
     cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks));
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("Could not connect to the CPG API: %s (%d)",
c54a00
-                cs_strerror(rc), rc);
c54a00
+        crm_err("Could not connect to the CPG API (rc=%d)", rc);
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
     rc = cpg_fd_get(handle, &fd;;
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("Could not obtain the CPG API connection: %s (%d)",
c54a00
-                cs_strerror(rc), rc);
c54a00
+        crm_err("Could not obtain the CPG API connection (rc=%d)", rc);
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
diff --git a/mcp/corosync.c b/mcp/corosync.c
c54a00
index 407a63f..40be727 100644
c54a00
--- a/mcp/corosync.c
c54a00
+++ b/mcp/corosync.c
c54a00
@@ -118,13 +118,13 @@ cluster_connect_cfg(uint32_t * nodeid)
c54a00
     cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
c54a00
 
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc);
c54a00
+        crm_err("corosync cfg init error %d", rc);
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
     rc = corosync_cfg_fd_get(cfg_handle, &fd;;
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc);
c54a00
+        crm_err("corosync cfg fd_get error %d", rc);
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
@@ -314,8 +314,8 @@ mcp_read_config(void)
c54a00
         rc = cmap_initialize(&local_handle);
c54a00
         if (rc != CS_OK) {
c54a00
             retries++;
c54a00
-            printf("cmap connection setup failed: %s.  Retrying in %ds\n", cs_strerror(rc), retries);
c54a00
-            crm_info("cmap connection setup failed: %s.  Retrying in %ds", cs_strerror(rc), retries);
c54a00
+            printf("cmap connection setup failed: error %d.  Retrying in %ds\n", rc, retries);
c54a00
+            crm_info("cmap connection setup failed: error %d.  Retrying in %ds", rc, retries);
c54a00
             sleep(retries);
c54a00
 
c54a00
         } else {
c54a00
@@ -331,10 +331,10 @@ mcp_read_config(void)
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
+#if HAVE_CMAP
c54a00
     rc = cmap_fd_get(local_handle, &fd;;
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
-                cs_strerror(rc), rc);
c54a00
+        crm_err("Could not obtain the CMAP API connection: error %d", rc);
c54a00
         cmap_finalize(local_handle);
c54a00
         return FALSE;
c54a00
     }
c54a00
@@ -354,6 +354,7 @@ mcp_read_config(void)
c54a00
         cmap_finalize(local_handle);
c54a00
         return FALSE;
c54a00
     }
c54a00
+#endif
c54a00
 
c54a00
     stack = get_cluster_type();
c54a00
     crm_info("Reading configure for stack: %s", name_for_cluster_type(stack));
c54a00
-- 
c54a00
1.8.3.1
c54a00