c54a00
From f91a961112ec9796181b42aa52f9c36dfa3c6a99 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Tue, 2 Apr 2019 10:13:21 +0200
c54a00
Subject: [PATCH 1/7] High: libservices: fix use-after-free wrt. alert handling
c54a00
c54a00
This could possibly lead to unsolicited information disclosure by the
c54a00
means of standard output of the immediately preceding agent/resource
c54a00
execution leaking into the log stream under some circumstances.
c54a00
It was hence assigned CVE-2019-3885.
c54a00
c54a00
The provoked pathological state of pacemaker-execd daemon progresses
c54a00
towards crashing it for hitting segmentation fault.
c54a00
---
c54a00
 lib/services/services.c       | 40 +---------------------------------------
c54a00
 lib/services/services_linux.c | 35 +++++++++++++++++++++++++++++++----
c54a00
 2 files changed, 32 insertions(+), 43 deletions(-)
c54a00
c54a00
diff --git a/lib/services/services.c b/lib/services/services.c
c54a00
index ef2c5fc..1d06c5d 100644
c54a00
--- a/lib/services/services.c
c54a00
+++ b/lib/services/services.c
c54a00
@@ -450,35 +450,6 @@ services_action_user(svc_action_t *op, const char *user)
c54a00
     return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid));
c54a00
 }
c54a00
 
c54a00
-static void
c54a00
-set_alert_env(gpointer key, gpointer value, gpointer user_data)
c54a00
-{
c54a00
-    int rc;
c54a00
-
c54a00
-    if (value) {
c54a00
-        rc = setenv(key, value, 1);
c54a00
-    } else {
c54a00
-        rc = unsetenv(key);
c54a00
-    }
c54a00
-
c54a00
-    if (rc < 0) {
c54a00
-        crm_perror(LOG_ERR, "setenv %s=%s",
c54a00
-                  (char*)key, (value? (char*)value : ""));
c54a00
-    } else {
c54a00
-        crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
c54a00
-    }
c54a00
-}
c54a00
-
c54a00
-static void
c54a00
-unset_alert_env(gpointer key, gpointer value, gpointer user_data)
c54a00
-{
c54a00
-    if (unsetenv(key) < 0) {
c54a00
-        crm_perror(LOG_ERR, "unset %s", (char*)key);
c54a00
-    } else {
c54a00
-        crm_trace("unset %s", (char*)key);
c54a00
-    }
c54a00
-}
c54a00
-
c54a00
 /*!
c54a00
  * \brief Execute an alert agent action
c54a00
  *
c54a00
@@ -493,18 +464,9 @@ unset_alert_env(gpointer key, gpointer value, gpointer user_data)
c54a00
 gboolean
c54a00
 services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op))
c54a00
 {
c54a00
-    gboolean responsible;
c54a00
-
c54a00
     action->synchronous = false;
c54a00
     action->opaque->callback = cb;
c54a00
-    if (action->params) {
c54a00
-        g_hash_table_foreach(action->params, set_alert_env, NULL);
c54a00
-    }
c54a00
-    responsible = services_os_action_execute(action);
c54a00
-    if (action->params) {
c54a00
-        g_hash_table_foreach(action->params, unset_alert_env, NULL);
c54a00
-    }
c54a00
-    return responsible;
c54a00
+    return services_os_action_execute(action);
c54a00
 }
c54a00
 
c54a00
 #if SUPPORT_DBUS
c54a00
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
c54a00
index 705901e..2047b64 100644
c54a00
--- a/lib/services/services_linux.c
c54a00
+++ b/lib/services/services_linux.c
c54a00
@@ -159,6 +159,25 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
c54a00
     set_ocf_env(buffer, value, user_data);
c54a00
 }
c54a00
 
c54a00
+static void
c54a00
+set_alert_env(gpointer key, gpointer value, gpointer user_data)
c54a00
+{
c54a00
+    int rc;
c54a00
+
c54a00
+    if (value != NULL) {
c54a00
+        rc = setenv(key, value, 1);
c54a00
+    } else {
c54a00
+        rc = unsetenv(key);
c54a00
+    }
c54a00
+
c54a00
+    if (rc < 0) {
c54a00
+        crm_perror(LOG_ERR, "setenv %s=%s",
c54a00
+                  (char*)key, (value? (char*)value : ""));
c54a00
+    } else {
c54a00
+        crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : ""));
c54a00
+    }
c54a00
+}
c54a00
+
c54a00
 /*!
c54a00
  * \internal
c54a00
  * \brief Add environment variables suitable for an action
c54a00
@@ -168,12 +187,20 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data)
c54a00
 static void
c54a00
 add_action_env_vars(const svc_action_t *op)
c54a00
 {
c54a00
-    if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) {
c54a00
-        return;
c54a00
+    void (*env_setter)(gpointer, gpointer, gpointer) = NULL;
c54a00
+    if (op->agent == NULL) {
c54a00
+        env_setter = set_alert_env;  /* we deal with alert handler */
c54a00
+
c54a00
+    } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF)) {
c54a00
+        env_setter = set_ocf_env_with_prefix;
c54a00
     }
c54a00
 
c54a00
-    if (op->params) {
c54a00
-        g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL);
c54a00
+    if (env_setter != NULL && op->params != NULL) {
c54a00
+        g_hash_table_foreach(op->params, env_setter, NULL);
c54a00
+    }
c54a00
+
c54a00
+    if (env_setter == NULL || env_setter == set_alert_env) {
c54a00
+        return;
c54a00
     }
c54a00
 
c54a00
     set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL);
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From ab44422fa955c2dff1ac1822521e7ad335d4aab7 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Mon, 15 Apr 2019 23:19:44 +0200
c54a00
Subject: [PATCH 2/7] High: pacemakerd vs. IPC/procfs confused deputy
c54a00
 authenticity issue (0/4)
c54a00
c54a00
[0/4: make crm_pid_active more precise as to when detections fail]
c54a00
c54a00
It would be bad if the function claimed the process is not active
c54a00
when the only obstacle in the detection process was that none of the
c54a00
detection methods worked for a plain lack of permissions to apply
c54a00
them.  Also, do some other minor cleanup of the function and add its
c54a00
documentation.  As an additional measure, log spamming is kept at
c54a00
minimum for repeated queries about the same PID.
c54a00
---
c54a00
 include/crm_internal.h | 21 +++++++++++
c54a00
 lib/common/utils.c     | 96 +++++++++++++++++++++++++++-----------------------
c54a00
 2 files changed, 73 insertions(+), 44 deletions(-)
c54a00
c54a00
diff --git a/include/crm_internal.h b/include/crm_internal.h
c54a00
index 5692929..0adeb7b 100644
c54a00
--- a/include/crm_internal.h
c54a00
+++ b/include/crm_internal.h
c54a00
@@ -140,6 +140,27 @@ extern int node_score_yellow;
c54a00
 extern int node_score_infinity;
c54a00
 
c54a00
 /* Assorted convenience functions */
c54a00
+
c54a00
+/*!
c54a00
+ * \internal
c54a00
+ * \brief Detect if process per PID and optionally exe path (component) exists
c54a00
+ *
c54a00
+ * \param[in] pid     PID of process assumed alive, disproving of which to try
c54a00
+ * \param[in] daemon  exe path (component) to possibly match with procfs entry
c54a00
+ *
c54a00
+ * \return -1 on invalid PID specification, -2 when the calling process has no
c54a00
+ *         (is refused an) ability to (dis)prove the predicate,
c54a00
+ *         0 if the negation of the predicate is confirmed (check-through-kill
c54a00
+ *         indicates so, or the subsequent check-through-procfs-match on
c54a00
+ *         \p daemon when provided and procfs available at the standard path),
c54a00
+ *         1 if it cannot be disproved (reliably [modulo race conditions]
c54a00
+ *         when \p daemon provided, procfs available at the standard path
c54a00
+ *         and the calling process has permissions to access the respective
c54a00
+ *         procfs location, less so otherwise, since mere check-through-kill
c54a00
+ *         is exercised without powers to exclude PID recycled in the interim).
c54a00
+ *
c54a00
+ * \note This function cannot be used to verify \e authenticity of the process.
c54a00
+ */
c54a00
 int crm_pid_active(long pid, const char *daemon);
c54a00
 void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile);
c54a00
 
c54a00
diff --git a/lib/common/utils.c b/lib/common/utils.c
c54a00
index f3f60ed..2ac7901 100644
c54a00
--- a/lib/common/utils.c
c54a00
+++ b/lib/common/utils.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This library is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU Lesser General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2.1 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This library is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * Lesser General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU Lesser General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -717,16 +708,21 @@ crm_abort(const char *file, const char *function, int line,
c54a00
 int
c54a00
 crm_pid_active(long pid, const char *daemon)
c54a00
 {
c54a00
+    static int last_asked_pid = 0;  /* log spam prevention */
c54a00
+#if SUPPORT_PROCFS
c54a00
     static int have_proc_pid = 0;
c54a00
+#else
c54a00
+    static int have_proc_pid = -1;
c54a00
+#endif
c54a00
+    int rc = 0;
c54a00
 
c54a00
-    if(have_proc_pid == 0) {
c54a00
+    if (have_proc_pid == 0) {
c54a00
+        /* evaluation of /proc/PID/exe applicability via self-introspection */
c54a00
         char proc_path[PATH_MAX], exe_path[PATH_MAX];
c54a00
-
c54a00
-        /* check to make sure pid hasn't been reused by another process */
c54a00
-        snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid());
c54a00
-
c54a00
+        snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe",
c54a00
+                 (long unsigned int) getpid());
c54a00
         have_proc_pid = 1;
c54a00
-        if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) {
c54a00
+        if (readlink(proc_path, exe_path, sizeof(exe_path) - 1) < 0) {
c54a00
             have_proc_pid = -1;
c54a00
         }
c54a00
     }
c54a00
@@ -734,40 +730,52 @@ crm_pid_active(long pid, const char *daemon)
c54a00
     if (pid <= 0) {
c54a00
         return -1;
c54a00
 
c54a00
-    } else if (kill(pid, 0) < 0 && errno == ESRCH) {
c54a00
-        return 0;
c54a00
+    } else if ((rc = kill(pid, 0)) < 0 && errno == ESRCH) {
c54a00
+        return 0;  /* no such PID detected */
c54a00
 
c54a00
-    } else if(daemon == NULL || have_proc_pid == -1) {
c54a00
-        return 1;
c54a00
+    } else if (rc < 0 && have_proc_pid == -1) {
c54a00
+        if (last_asked_pid != pid) {
c54a00
+            crm_info("Cannot examine PID %ld: %s", pid, strerror(errno));
c54a00
+            last_asked_pid = pid;
c54a00
+        }
c54a00
+        return -2;  /* errno != ESRCH */
c54a00
+
c54a00
+    } else if (rc == 0 && (daemon == NULL || have_proc_pid == -1)) {
c54a00
+        return 1;  /* kill as the only indicator, cannot double check */
c54a00
 
c54a00
     } else {
c54a00
-        int rc = 0;
c54a00
+        /* make sure PID hasn't been reused by another process
c54a00
+           XXX: might still be just a zombie, which could confuse decisions */
c54a00
+        bool checked_through_kill = (rc == 0);
c54a00
         char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX];
c54a00
-
c54a00
-        /* check to make sure pid hasn't been reused by another process */
c54a00
-        snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid);
c54a00
-
c54a00
-        rc = readlink(proc_path, exe_path, PATH_MAX - 1);
c54a00
-        if (rc < 0 && errno == EACCES) {
c54a00
-            crm_perror(LOG_INFO, "Could not read from %s", proc_path);
c54a00
-            return 1;
c54a00
+        snprintf(proc_path, sizeof(proc_path), "/proc/%ld/exe", pid);
c54a00
+
c54a00
+        rc = readlink(proc_path, exe_path, sizeof(exe_path) - 1);
c54a00
+        if ((rc < 0) && (errno == EACCES)) {
c54a00
+            if (last_asked_pid != pid) {
c54a00
+                crm_info("Could not read from %s: %s", proc_path,
c54a00
+                         strerror(errno));
c54a00
+                last_asked_pid = pid;
c54a00
+            }
c54a00
+            return checked_through_kill ? 1 : -2;
c54a00
         } else if (rc < 0) {
c54a00
-            crm_perror(LOG_ERR, "Could not read from %s", proc_path);
c54a00
-            return 0;
c54a00
+            if (last_asked_pid != pid) {
c54a00
+                crm_err("Could not read from %s: %s (%d)", proc_path,
c54a00
+                        strerror(errno), errno);
c54a00
+                last_asked_pid = pid;
c54a00
+            }
c54a00
+            return 0;  /* most likely errno == ENOENT */
c54a00
         }
c54a00
-        
c54a00
+        exe_path[rc] = '\0';
c54a00
 
c54a00
-        exe_path[rc] = 0;
c54a00
-
c54a00
-        if(daemon[0] != '/') {
c54a00
-            rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon);
c54a00
-            myexe_path[rc] = 0;
c54a00
+        if (daemon[0] != '/') {
c54a00
+            rc = snprintf(myexe_path, sizeof(myexe_path), CRM_DAEMON_DIR"/%s",
c54a00
+                          daemon);
c54a00
         } else {
c54a00
-            rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon);
c54a00
-            myexe_path[rc] = 0;
c54a00
+            rc = snprintf(myexe_path, sizeof(myexe_path), "%s", daemon);
c54a00
         }
c54a00
-        
c54a00
-        if (strcmp(exe_path, myexe_path) == 0) {
c54a00
+
c54a00
+        if (rc > 0 && rc < sizeof(myexe_path) && !strcmp(exe_path, myexe_path)) {
c54a00
             return 1;
c54a00
         }
c54a00
     }
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 6888aaf3ad365ef772f8189c9958f58b85ec62d4 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Mon, 15 Apr 2019 23:20:42 +0200
c54a00
Subject: [PATCH 3/7] High: pacemakerd vs. IPC/procfs confused deputy
c54a00
 authenticity issue (1/4)
c54a00
c54a00
[1/4: new helpers to allow IPC client side to authenticate the server]
c54a00
c54a00
The title problem here could possibly lead to local privilege escalation
c54a00
up to the root's level (and implicitly unguarded by some additional
c54a00
protection layers like SELinux unless the defaults constrained further).
c54a00
c54a00
Main problem is that the authenticity assumptions were built on two,
c54a00
seemingly mutually supporting legs leading to two CVEs assigned:
c54a00
c54a00
* procfs (mere process existence and right path to binary check)
c54a00
  used to verify (this part was assigned CVE-2018-16878), and
c54a00
c54a00
* one-way only client-server authentication, putting the client
c54a00
  here at the mercy of the server not necessarily cross-verified
c54a00
  per the above point if at all (this part was assigned
c54a00
  CVE-2018-16877)
c54a00
c54a00
whereas these two were in fact orthogonal, tearing security
c54a00
assumptions about the "passive influencers" in the pacemaker's daemon
c54a00
resilience-friendly constellation (orchestrated by the main of them,
c54a00
pacemakerd) apart.  Moreover, procfs-based approach is discouraged
c54a00
for other reasons.
c54a00
c54a00
The layout of the basic fix is as follows:
c54a00
* 1/4: new helpers to allow IPC client side to authenticate the server
c54a00
       (this commit, along with unifying subsequent solution for
c54a00
       both CVEs)
c54a00
* 2/4: pacemakerd to trust pre-existing processes via new checks instead
c54a00
       (along with unifying solution for both CVEs)
c54a00
* 3/4: other daemons to authenticate IPC servers of fellow processes
c54a00
       (along with addressing CVE-2018-16877 alone, for parts of
c54a00
       pacemaker not covered earlier)
c54a00
* 4/4: CPG users to be careful about now-more-probable rival processes
c54a00
       (this is merely to mitigate corner case fallout from the new
c54a00
       approaches taken to face CVE-2018-16878 in particular;
c54a00
       courtesy of Yan Gao of SUSE for reporting this)
c54a00
c54a00
With "basic", it is meant that it constitutes a self-contained best
c54a00
effort solution with some compromises that can only be overcome with the
c54a00
assistance of IPC library, libqb, as is also elaborated in messages of
c54a00
remaining "fix" commits.  Beside that, also conventional encapsulation
c54a00
of server-by-client authentication would be useful, but lack thereof
c54a00
is not an obstacle (more so should there by any security related
c54a00
neglectations on the IPC client side and its connection initiating
c54a00
arrangement within libqb that has a potential to strike as early as
c54a00
when the authenticity of the server side is yet to be examined).
c54a00
c54a00
One extra kludge that's introduced for FreeBSD lacking Unix socket to
c54a00
remote peer PID mapping is masquerading such an unspecified PID with
c54a00
value of 1, since that shall always be around as "init" task and,
c54a00
deferring to proof by contradiction, cannot be pacemakerd-spawned
c54a00
child either even if PID 1 was pacemakerd (and running such a child
c54a00
alone is rather nonsensical).  The code making decisions based on that
c54a00
value must acknowledge this craze and refrain from killing/signalling
c54a00
the underlying process on this platform (but shall in general follow
c54a00
the same elsewhere, keep in mind systemd socket-based activation for
c54a00
instance, which would end up in such a situation easily!).
c54a00
---
c54a00
 configure.ac                      |  43 +++++++++++
c54a00
 include/crm/common/Makefile.am    |   7 +-
c54a00
 include/crm/common/ipc.h          |  55 +++++++++----
c54a00
 include/crm/common/ipc_internal.h |  69 +++++++++++++++++
c54a00
 lib/common/ipc.c                  | 158 ++++++++++++++++++++++++++++++++++----
c54a00
 5 files changed, 303 insertions(+), 29 deletions(-)
c54a00
 create mode 100644 include/crm/common/ipc_internal.h
c54a00
c54a00
diff --git a/configure.ac b/configure.ac
c54a00
index ed51f67..ce02777 100644
c54a00
--- a/configure.ac
c54a00
+++ b/configure.ac
c54a00
@@ -465,6 +465,48 @@ do
c54a00
   fi
c54a00
 done
c54a00
 
c54a00
+us_auth=
c54a00
+AC_CHECK_HEADER([sys/socket.h], [
c54a00
+    AC_CHECK_DECL([SO_PEERCRED], [
c54a00
+        # Linux
c54a00
+        AC_CHECK_TYPE([struct ucred], [
c54a00
+            us_auth=peercred_ucred;
c54a00
+            AC_DEFINE([US_AUTH_PEERCRED_UCRED], [1],
c54a00
+                      [Define if Unix socket auth method is
c54a00
+                       getsockopt(s, SO_PEERCRED, &ucred, ...)])
c54a00
+        ], [
c54a00
+            # OpenBSD
c54a00
+            AC_CHECK_TYPE([struct sockpeercred], [
c54a00
+                us_auth=localpeercred_sockepeercred;
c54a00
+                AC_DEFINE([US_AUTH_PEERCRED_SOCKPEERCRED], [1],
c54a00
+                          [Define if Unix socket auth method is
c54a00
+                           getsockopt(s, SO_PEERCRED, &sockpeercred, ...)])
c54a00
+            ], [], [[#include <sys/socket.h>]])
c54a00
+        ], [[#define _GNU_SOURCE
c54a00
+             #include <sys/socket.h>]])
c54a00
+    ], [], [[#include <sys/socket.h>]])
c54a00
+])
c54a00
+
c54a00
+if test -z "${us_auth}"; then
c54a00
+    # FreeBSD
c54a00
+    AC_CHECK_DECL([getpeereid], [
c54a00
+        us_auth=getpeereid;
c54a00
+        AC_DEFINE([US_AUTH_GETPEEREID], [1],
c54a00
+                  [Define if Unix socket auth method is
c54a00
+                   getpeereid(s, &uid, &gid)])
c54a00
+    ], [
c54a00
+        # Solaris/OpenIndiana
c54a00
+        AC_CHECK_DECL([getpeerucred], [
c54a00
+            us_auth=getpeerucred;
c54a00
+            AC_DEFINE([US_AUTH_GETPEERUCRED], [1],
c54a00
+                      [Define if Unix socket auth method is
c54a00
+                       getpeercred(s, &ucred)])
c54a00
+        ], [
c54a00
+            AC_MSG_ERROR([No way to authenticate a Unix socket peer])
c54a00
+        ], [[#include <ucred.h>]])
c54a00
+    ])
c54a00
+fi
c54a00
+
c54a00
 dnl This OS-based decision-making is poor autotools practice;
c54a00
 dnl feature-based mechanisms are strongly preferred.
c54a00
 dnl
c54a00
@@ -2179,3 +2221,4 @@ AC_MSG_RESULT([  LDFLAGS_HARDENED_EXE     = ${LDFLAGS_HARDENED_EXE}])
c54a00
 AC_MSG_RESULT([  LDFLAGS_HARDENED_LIB     = ${LDFLAGS_HARDENED_LIB}])
c54a00
 AC_MSG_RESULT([  Libraries                = ${LIBS}])
c54a00
 AC_MSG_RESULT([  Stack Libraries          = ${CLUSTERLIBS}])
c54a00
+AC_MSG_RESULT([  Unix socket auth method  = ${us_auth}])
c54a00
diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am
c54a00
index b90ac79..aacb6ff 100644
c54a00
--- a/include/crm/common/Makefile.am
c54a00
+++ b/include/crm/common/Makefile.am
c54a00
@@ -1,5 +1,7 @@
c54a00
 #
c54a00
-# Copyright 2004-2019 Andrew Beekhof <andrew@beekhof.net>
c54a00
+# Copyright 2004-2019 the Pacemaker project contributors
c54a00
+#
c54a00
+# The version control history for this file may have further details.
c54a00
 #
c54a00
 # This source code is licensed under the GNU General Public License version 2
c54a00
 # or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
@@ -11,7 +13,8 @@ headerdir=$(pkgincludedir)/crm/common
c54a00
 
c54a00
 header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h \
c54a00
 		 nvpair.h
c54a00
-noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h
c54a00
+noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h \
c54a00
+		 ipc_internal.h
c54a00
 if BUILD_CIBSECRETS
c54a00
 noinst_HEADERS += cib_secrets.h
c54a00
 endif
c54a00
diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h
c54a00
index 8722252..df56bbe 100644
c54a00
--- a/include/crm/common/ipc.h
c54a00
+++ b/include/crm/common/ipc.h
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2013 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2013-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU Lesser General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU Lesser General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 #ifndef CRM_COMMON_IPC__H
c54a00
 #  define CRM_COMMON_IPC__H
c54a00
@@ -77,6 +68,44 @@ uint32_t crm_ipc_buffer_flags(crm_ipc_t * client);
c54a00
 const char *crm_ipc_name(crm_ipc_t * client);
c54a00
 unsigned int crm_ipc_default_buffer_size(void);
c54a00
 
c54a00
+/*!
c54a00
+ * \brief Check the authenticity of the IPC socket peer process
c54a00
+ *
c54a00
+ * If everything goes well, peer's authenticity is verified by the means
c54a00
+ * of comparing against provided referential UID and GID (either satisfies),
c54a00
+ * and the result of this check can be deduced from the return value.
c54a00
+ * As an exception, detected UID of 0 ("root") satisfies arbitrary
c54a00
+ * provided referential daemon's credentials.
c54a00
+ *
c54a00
+ * \param[in]  sock    IPC related, connected Unix socket to check peer of
c54a00
+ * \param[in]  refuid  referential UID to check against
c54a00
+ * \param[in]  refgid  referential GID to check against
c54a00
+ * \param[out] gotpid  to optionally store obtained PID of the peer
c54a00
+ *                     (not available on FreeBSD, special value of 1
c54a00
+ *                     used instead, and the caller is required to
c54a00
+ *                     special case this value respectively)
c54a00
+ * \param[out] gotuid  to optionally store obtained UID of the peer
c54a00
+ * \param[out] gotgid  to optionally store obtained GID of the peer
c54a00
+ *
c54a00
+ * \return 0 if IPC related socket's peer is not authentic given the
c54a00
+ *         referential credentials (see above), 1 if it is,
c54a00
+ *         negative value on error (generally expressing -errno unless
c54a00
+ *         it was zero even on nonhappy path, -pcmk_err_generic is
c54a00
+ *         returned then; no message is directly emitted)
c54a00
+ *
c54a00
+ * \note While this function is tolerant on what constitutes authorized
c54a00
+ *       IPC daemon process (its effective user matches UID=0 or \p refuid,
c54a00
+ *       or at least its group matches \p refroup), either or both (in case
c54a00
+ *       of UID=0) mismatches on the expected credentials of such peer
c54a00
+ *       process \e shall be investigated at the caller when value of 1
c54a00
+ *       gets returned there, since higher-than-expected privileges in
c54a00
+ *       respect to the expected/intended credentials possibly violate
c54a00
+ *       the least privilege principle and may pose an additional risk
c54a00
+ *       (i.e. such accidental inconsistency shall be eventually fixed).
c54a00
+ */
c54a00
+int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
c54a00
+                                 pid_t *gotpid, uid_t *gotuid, gid_t *gotgid);
c54a00
+
c54a00
 /* Utils */
c54a00
 xmlNode *create_hello_message(const char *uuid, const char *client_name,
c54a00
                               const char *major_version, const char *minor_version);
c54a00
diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h
c54a00
new file mode 100644
c54a00
index 0000000..41a6653
c54a00
--- /dev/null
c54a00
+++ b/include/crm/common/ipc_internal.h
c54a00
@@ -0,0 +1,69 @@
c54a00
+/*
c54a00
+ * Copyright 2019 the Pacemaker project contributors
c54a00
+ *
c54a00
+ * The version control history for this file may have further details.
c54a00
+ *
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
+ */
c54a00
+
c54a00
+#ifndef PCMK__IPC_INTERNAL_H
c54a00
+#define PCMK__IPC_INTERNAL_H
c54a00
+
c54a00
+#include <sys/types.h>
c54a00
+
c54a00
+#include <crm_config.h>  /* US_AUTH_GETPEEREID */
c54a00
+
c54a00
+
c54a00
+/* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that
c54a00
+   require a delicate handling anyway (socket-based activation with systemd);
c54a00
+   we can be reasonably sure that this PID is never possessed by the actual
c54a00
+   child daemon, as it gets taken either by the proper init, or by pacemakerd
c54a00
+   itself (i.e. this precludes anything else); note that value of zero
c54a00
+   is meant to carry "unset" meaning, and better not to bet on/conditionalize
c54a00
+   over signedness of pid_t */
c54a00
+#define PCMK__SPECIAL_PID  1
c54a00
+
c54a00
+#if defined(US_AUTH_GETPEEREID)
c54a00
+/* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to
c54a00
+   "IPC liveness check only") as its nominal representation, which could
c54a00
+   cause confusion -- this is unambiguous as long as there's no
c54a00
+   socket-based activation like with systemd (very improbable) */
c54a00
+#define PCMK__SPECIAL_PID_AS_0(p)  (((p) == PCMK__SPECIAL_PID) ? 0 : (p))
c54a00
+#else
c54a00
+#define PCMK__SPECIAL_PID_AS_0(p)  (p)
c54a00
+#endif
c54a00
+
c54a00
+/*!
c54a00
+ * \internal
c54a00
+ * \brief Check the authenticity and liveness of the process via IPC end-point
c54a00
+ *
c54a00
+ * When IPC daemon under given IPC end-point (name) detected, its authenticity
c54a00
+ * is verified by the means of comparing against provided referential UID and
c54a00
+ * GID, and the result of this check can be deduced from the return value.
c54a00
+ * As an exception, referential UID of 0 (~ root) satisfies arbitrary
c54a00
+ * detected daemon's credentials.
c54a00
+ *
c54a00
+ * \param[in]  name    IPC name to base the search on
c54a00
+ * \param[in]  refuid  referential UID to check against
c54a00
+ * \param[in]  refgid  referential GID to check against
c54a00
+ * \param[out] gotpid  to optionally store obtained PID of the found process
c54a00
+ *                     upon returning 1 or -2
c54a00
+ *                     (not available on FreeBSD, special value of 1,
c54a00
+ *                     see PCMK__SPECIAL_PID, used instead, and the caller
c54a00
+ *                     is required to special case this value respectively)
c54a00
+ *
c54a00
+ * \return 0 if no trace of IPC peer's liveness detected, 1 if it was,
c54a00
+ *         -1 on error, and -2 when the IPC blocked with unauthorized
c54a00
+ *         process (log message emitted in both latter cases)
c54a00
+ *
c54a00
+ * \note This function emits a log message also in case there isn't a perfect
c54a00
+ *       match in respect to \p reguid and/or \p refgid, for a possible
c54a00
+ *       least privilege principle violation.
c54a00
+ *
c54a00
+ * \see crm_ipc_is_authentic_process
c54a00
+ */
c54a00
+int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
c54a00
+                                          gid_t refgid, pid_t *gotpid);
c54a00
+
c54a00
+#endif
c54a00
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
c54a00
index 3258bcb..5b47dd6 100644
c54a00
--- a/lib/common/ipc.c
c54a00
+++ b/lib/common/ipc.c
c54a00
@@ -1,23 +1,25 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This library is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU Lesser General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2.1 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This library is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * Lesser General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU Lesser General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
 
c54a00
+#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED)
c54a00
+#  ifdef US_AUTH_PEERCRED_UCRED
c54a00
+#    ifndef _GNU_SOURCE
c54a00
+#      define _GNU_SOURCE
c54a00
+#    endif
c54a00
+#  endif
c54a00
+#  include <sys/socket.h>
c54a00
+#elif defined(US_AUTH_GETPEERUCRED)
c54a00
+#  include <ucred.h>
c54a00
+#endif
c54a00
+
c54a00
 #include <sys/param.h>
c54a00
 
c54a00
 #include <stdio.h>
c54a00
@@ -30,11 +32,13 @@
c54a00
 #include <fcntl.h>
c54a00
 #include <bzlib.h>
c54a00
 
c54a00
-#include <crm/crm.h>
c54a00
+#include <crm/crm.h>   /* indirectly: pcmk_err_generic */
c54a00
 #include <crm/msg_xml.h>
c54a00
 #include <crm/common/ipc.h>
c54a00
 #include <crm/common/ipcs.h>
c54a00
 
c54a00
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
c54a00
+
c54a00
 #define PCMK_IPC_VERSION 1
c54a00
 
c54a00
 /* Evict clients whose event queue grows this large (by default) */
c54a00
@@ -1375,6 +1379,132 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in
c54a00
     return rc;
c54a00
 }
c54a00
 
c54a00
+int
c54a00
+crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
c54a00
+                             pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) {
c54a00
+    int ret = 0;
c54a00
+    pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
c54a00
+#if defined(US_AUTH_PEERCRED_UCRED)
c54a00
+    struct ucred ucred;
c54a00
+    socklen_t ucred_len = sizeof(ucred);
c54a00
+
c54a00
+    if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
c54a00
+                    &ucred, &ucred_len)
c54a00
+                && ucred_len == sizeof(ucred)) {
c54a00
+        found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid;
c54a00
+
c54a00
+#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED)
c54a00
+    struct sockpeercred sockpeercred;
c54a00
+    socklen_t sockpeercred_len = sizeof(sockpeercred);
c54a00
+
c54a00
+    if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
c54a00
+                    &sockpeercred, &sockpeercred_len)
c54a00
+                && sockpeercred_len == sizeof(sockpeercred_len)) {
c54a00
+        found_pid = sockpeercred.pid;
c54a00
+        found_uid = sockpeercred.uid; found_gid = sockpeercred.gid;
c54a00
+
c54a00
+#elif defined(US_AUTH_GETPEEREID)
c54a00
+    if (!getpeereid(sock, &found_uid, &found_gid)) {
c54a00
+        found_pid = PCMK__SPECIAL_PID;  /* cannot obtain PID (FreeBSD) */
c54a00
+
c54a00
+#elif defined(US_AUTH_GETPEERUCRED)
c54a00
+    ucred_t *ucred;
c54a00
+    if (!getpeerucred(sock, &ucred)) {
c54a00
+        errno = 0;
c54a00
+        found_pid = ucred_getpid(ucred);
c54a00
+        found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred);
c54a00
+        ret = -errno;
c54a00
+        ucred_free(ucred);
c54a00
+        if (ret) {
c54a00
+            return (ret < 0) ? ret : -pcmk_err_generic;
c54a00
+        }
c54a00
+
c54a00
+#else
c54a00
+#  error "No way to authenticate a Unix socket peer"
c54a00
+    errno = 0;
c54a00
+    if (0) {
c54a00
+#endif
c54a00
+        if (gotpid != NULL) {
c54a00
+            *gotpid = found_pid;
c54a00
+        }
c54a00
+        if (gotuid != NULL) {
c54a00
+            *gotuid = found_uid;
c54a00
+        }
c54a00
+        if (gotgid != NULL) {
c54a00
+            *gotgid = found_gid;
c54a00
+        }
c54a00
+        ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid);
c54a00
+    } else {
c54a00
+        ret = (errno > 0) ? -errno : -pcmk_err_generic;
c54a00
+    }
c54a00
+
c54a00
+    return ret;
c54a00
+}
c54a00
+
c54a00
+int
c54a00
+pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
c54a00
+                                      gid_t refgid, pid_t *gotpid) {
c54a00
+    static char last_asked_name[PATH_MAX / 2] = "";  /* log spam prevention */
c54a00
+    int fd, ret = 0;
c54a00
+    pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
c54a00
+    qb_ipcc_connection_t *c;
c54a00
+
c54a00
+    if ((c = qb_ipcc_connect(name, 0)) == NULL) {
c54a00
+        crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
c54a00
+
c54a00
+    } else if ((ret = qb_ipcc_fd_get(c, &fd))) {
c54a00
+        crm_err("Could not get fd from %s IPC: %s (%d)", name,
c54a00
+                strerror(-ret), -ret);
c54a00
+        ret = -1;
c54a00
+
c54a00
+    } else if ((ret = crm_ipc_is_authentic_process(fd, refuid, refgid,
c54a00
+                                                   &found_pid, &found_uid,
c54a00
+                                                   &found_gid)) < 0) {
c54a00
+        if (ret == -pcmk_err_generic) {
c54a00
+            crm_err("Could not get peer credentials from %s IPC", name);
c54a00
+        } else {
c54a00
+            crm_err("Could not get peer credentials from %s IPC: %s (%d)",
c54a00
+                    name, strerror(-ret), -ret);
c54a00
+        }
c54a00
+        ret = -1;
c54a00
+
c54a00
+    } else {
c54a00
+        if (gotpid != NULL) {
c54a00
+            *gotpid = found_pid;
c54a00
+        }
c54a00
+
c54a00
+        if (!ret) {
c54a00
+            crm_err("Daemon (IPC %s) effectively blocked with unauthorized"
c54a00
+                    " process %lld (uid: %lld, gid: %lld)",
c54a00
+                    name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                    (long long) found_uid, (long long) found_gid);
c54a00
+            ret = -2;
c54a00
+        } else if ((found_uid != refuid || found_gid != refgid)
c54a00
+                && strncmp(last_asked_name, name, sizeof(last_asked_name))) {
c54a00
+            if (!found_uid && refuid) {
c54a00
+                crm_warn("Daemon (IPC %s) runs as root, whereas the expected"
c54a00
+                         " credentials are %lld:%lld, hazard of violating"
c54a00
+                         " the least privilege principle",
c54a00
+                         name, (long long) refuid, (long long) refgid);
c54a00
+            } else {
c54a00
+                crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the"
c54a00
+                           " expected credentials are %lld:%lld, which may"
c54a00
+                           " mean a different set of privileges than expected",
c54a00
+                           name, (long long) found_uid, (long long) found_gid,
c54a00
+                           (long long) refuid, (long long) refgid);
c54a00
+            }
c54a00
+            memccpy(last_asked_name, name, '\0', sizeof(last_asked_name));
c54a00
+        }
c54a00
+    }
c54a00
+
c54a00
+    if (ret) {  /* here, !ret only when we could not initially connect */
c54a00
+        qb_ipcc_disconnect(c);
c54a00
+    }
c54a00
+
c54a00
+    return ret;
c54a00
+}
c54a00
+
c54a00
+
c54a00
 /* Utils */
c54a00
 
c54a00
 xmlNode *
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 904c53ea311fd6fae945a55202b0a7ccf3783465 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Tue, 16 Apr 2019 00:04:47 +0200
c54a00
Subject: [PATCH 4/7] High: pacemakerd vs. IPC/procfs confused deputy
c54a00
 authenticity issue (2/4)
c54a00
c54a00
[2/4: pacemakerd to trust pre-existing processes via new checks instead]
c54a00
c54a00
In pacemakerd in the context of entrusting pre-existing processes,
c54a00
we now resort to procfs-based solution only in boundary, fouled cases,
c54a00
and primarily examine the credentials of the processes already
c54a00
occupying known IPC end-points before adopting them.
c54a00
c54a00
The commit applies the new helpers from 1/1 so as to close the both
c54a00
related sensitive problems, CVE-2018-16877 and CVE-2018-16878, in
c54a00
a unified manner, this time limited to the main daemon of pacemaker
c54a00
(pacemakerd).
c54a00
c54a00
To be noted that it is clearly not 100% for this purpose for still
c54a00
allowing for TOCTTOU, but that's what commit (3/3) is meant to solve
c54a00
for the most part, plus there may be optimizations solving this concern
c54a00
as a side effect, but that requires an active assistance on the libqb
c54a00
side (https://github.com/ClusterLabs/libqb/issues/325) since any
c54a00
improvement on pacemaker side in isolation would be very
c54a00
cumbersome if generally possible at all, but either way
c54a00
means a new, soft compatibility encumberment.
c54a00
c54a00
As a follow-up to what was put in preceding 1/3 commit, PID of 1 tracked
c54a00
as child's identification on FreeBSD (or when socket-based activation is
c54a00
used with systemd) is treated specially, incl. special precaution with
c54a00
child's PID discovered as 1 elsewhere.
c54a00
c54a00
v2: courtesy of Yan Gao of SUSE for early discovery and report for
c54a00
    what's primarily solved with 4/4 commit, in extension, child
c54a00
    daemons in the initialization phase coinciding with IPC-feasibility
c54a00
    based process scan in pacemakerd in a way that those are missed
c54a00
    (although they are to come up fully just moments later only
c54a00
    to interfere with naturally spawned ones) are now considered so
c54a00
    that if any native children later fail for said clash, the
c54a00
    pre-existing counterpart may get adopted instead of ending up
c54a00
    with repeated spawn-bury loop ad nauseam without real progress
c54a00
    (note that PCMK_fail_fast=true could possibly help, but that's
c54a00
    rather a big hammer not suitable for all the use cases, not
c54a00
    the ones we try to deal with gracefully here)
c54a00
---
c54a00
 mcp/pacemaker.c | 431 +++++++++++++++++++++++++++++++++++++++++++++++---------
c54a00
 1 file changed, 362 insertions(+), 69 deletions(-)
c54a00
c54a00
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
c54a00
index 2986be6..86df216 100644
c54a00
--- a/mcp/pacemaker.c
c54a00
+++ b/mcp/pacemaker.c
c54a00
@@ -1,5 +1,7 @@
c54a00
 /*
c54a00
- * Copyright 2010-2018 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2010-2019 the Pacemaker project contributors
c54a00
+ *
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
  * This source code is licensed under the GNU General Public License version 2
c54a00
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
@@ -10,17 +12,23 @@
c54a00
 
c54a00
 #include <pwd.h>
c54a00
 #include <grp.h>
c54a00
+#include <poll.h>
c54a00
 #include <sys/stat.h>
c54a00
 #include <sys/types.h>
c54a00
 #include <sys/time.h>
c54a00
 #include <sys/resource.h>
c54a00
 #include <sys/reboot.h>
c54a00
 
c54a00
+#include <crm/crm.h>  /* indirectly: CRM_EX_* */
c54a00
+#include <crm/cib/internal.h>  /* cib_channel_ro */
c54a00
 #include <crm/msg_xml.h>
c54a00
 #include <crm/common/ipcs.h>
c54a00
 #include <crm/common/mainloop.h>
c54a00
 #include <crm/cluster/internal.h>
c54a00
 #include <crm/cluster.h>
c54a00
+
c54a00
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID*, ... */
c54a00
+
c54a00
 #ifdef SUPPORT_COROSYNC
c54a00
 #include <corosync/cfg.h>
c54a00
 #endif
c54a00
@@ -31,6 +39,7 @@
c54a00
 gboolean pcmk_quorate = FALSE;
c54a00
 gboolean fatal_error = FALSE;
c54a00
 GMainLoop *mainloop = NULL;
c54a00
+static bool global_keep_tracking = false;
c54a00
 
c54a00
 #define PCMK_PROCESS_CHECK_INTERVAL 5
c54a00
 
c54a00
@@ -48,6 +57,7 @@ typedef struct pcmk_child_s {
c54a00
     const char *name;
c54a00
     const char *uid;
c54a00
     const char *command;
c54a00
+    const char *endpoint;  /* IPC server name */
c54a00
 
c54a00
     gboolean active_before_startup;
c54a00
 } pcmk_child_t;
c54a00
@@ -59,17 +69,35 @@ typedef struct pcmk_child_s {
c54a00
 static pcmk_child_t pcmk_children[] = {
c54a00
     { 0, crm_proc_none,       0, 0, FALSE, "none",       NULL,            NULL },
c54a00
     { 0, crm_proc_plugin,     0, 0, FALSE, "ais",        NULL,            NULL },
c54a00
-    { 0, crm_proc_lrmd,       3, 0, TRUE,  "lrmd",       NULL,            CRM_DAEMON_DIR"/lrmd" },
c54a00
-    { 0, crm_proc_cib,        1, 0, TRUE,  "cib",        CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" },
c54a00
-    { 0, crm_proc_crmd,       6, 0, TRUE,  "crmd",       CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" },
c54a00
-    { 0, crm_proc_attrd,      4, 0, TRUE,  "attrd",      CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" },
c54a00
-    { 0, crm_proc_stonithd,   0, 0, TRUE,  "stonithd",   NULL,            NULL },
c54a00
-    { 0, crm_proc_pe,         5, 0, TRUE,  "pengine",    CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" },
c54a00
-    { 0, crm_proc_mgmtd,      0, 0, TRUE,  "mgmtd",      NULL,            HB_DAEMON_DIR"/mgmtd" },
c54a00
-    { 0, crm_proc_stonith_ng, 2, 0, TRUE,  "stonith-ng", NULL,            CRM_DAEMON_DIR"/stonithd" },
c54a00
+    { 0, crm_proc_lrmd,       3, 0, TRUE,  "lrmd",       NULL,            CRM_DAEMON_DIR"/lrmd",
c54a00
+      CRM_SYSTEM_LRMD
c54a00
+    },
c54a00
+    { 0, crm_proc_cib,        1, 0, TRUE,  "cib",        CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib",
c54a00
+      cib_channel_ro
c54a00
+    },
c54a00
+    { 0, crm_proc_crmd,       6, 0, TRUE,  "crmd",       CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd",
c54a00
+      CRM_SYSTEM_CRMD
c54a00
+    },
c54a00
+    { 0, crm_proc_attrd,      4, 0, TRUE,  "attrd",      CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd",
c54a00
+      T_ATTRD
c54a00
+    },
c54a00
+    { 0, crm_proc_stonithd,   0, 0, TRUE,  "stonithd",   NULL,            NULL,
c54a00
+      NULL
c54a00
+    },
c54a00
+    { 0, crm_proc_pe,         5, 0, TRUE,  "pengine",    CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine",
c54a00
+      CRM_SYSTEM_PENGINE
c54a00
+    },
c54a00
+    { 0, crm_proc_mgmtd,      0, 0, TRUE,  "mgmtd",      NULL,            HB_DAEMON_DIR"/mgmtd",
c54a00
+      NULL
c54a00
+    },
c54a00
+    { 0, crm_proc_stonith_ng, 2, 0, TRUE,  "stonith-ng", NULL,            CRM_DAEMON_DIR"/stonithd",
c54a00
+      "stonith-ng"
c54a00
+    },
c54a00
 };
c54a00
 /* *INDENT-ON* */
c54a00
 
c54a00
+static gboolean check_active_before_startup_processes(gpointer user_data);
c54a00
+static int pcmk_child_active(pcmk_child_t *child);
c54a00
 static gboolean start_child(pcmk_child_t * child);
c54a00
 void update_process_clients(crm_client_t *client);
c54a00
 void update_process_peers(void);
c54a00
@@ -131,14 +159,31 @@ pcmk_process_exit(pcmk_child_t * child)
c54a00
     }
c54a00
 
c54a00
     if (shutdown_trigger) {
c54a00
+        /* resume step-wise shutdown (returned TRUE yields no parallelizing) */
c54a00
         mainloop_set_trigger(shutdown_trigger);
c54a00
+        /* intended to speed up propagating expected lay-off of the daemons? */
c54a00
         update_node_processes(local_nodeid, NULL, get_process_list());
c54a00
 
c54a00
-    } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) {
c54a00
+    } else if (!child->respawn) {
c54a00
+        /* nothing to do */
c54a00
+
c54a00
+    } else if (crm_is_true(getenv("PCMK_fail_fast"))) {
c54a00
         crm_err("Rebooting system because of %s", child->name);
c54a00
         pcmk_panic(__FUNCTION__);
c54a00
 
c54a00
-    } else if (child->respawn) {
c54a00
+    } else if (pcmk_child_active(child) == 1) {
c54a00
+        crm_warn("One-off suppressing strict respawning of a child process %s,"
c54a00
+                 " appears alright per %s IPC end-point",
c54a00
+                 child->name, child->endpoint);
c54a00
+        /* need to monitor how it evolves, and start new process if badly */
c54a00
+        child->active_before_startup = TRUE;
c54a00
+        if (!global_keep_tracking) {
c54a00
+            global_keep_tracking = true;
c54a00
+            g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
c54a00
+                                  check_active_before_startup_processes, NULL);
c54a00
+        }
c54a00
+
c54a00
+    } else {
c54a00
         crm_notice("Respawning failed child process: %s", child->name);
c54a00
         start_child(child);
c54a00
     }
c54a00
@@ -215,8 +260,13 @@ stop_child(pcmk_child_t * child, int signal)
c54a00
         signal = SIGTERM;
c54a00
     }
c54a00
 
c54a00
-    if (child->command == NULL) {
c54a00
-        crm_debug("Nothing to do for child \"%s\"", child->name);
c54a00
+    /* why to skip PID of 1?
c54a00
+       - FreeBSD ~ how untrackable process behind IPC is masqueraded as
c54a00
+       - elsewhere: how "init" task is designated; in particular, in systemd
c54a00
+         arrangement of socket-based activation, this is pretty real */
c54a00
+    if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) {
c54a00
+        crm_debug("Nothing to do for child \"%s\" (process %lld)",
c54a00
+                  child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid));
c54a00
         return TRUE;
c54a00
     }
c54a00
 
c54a00
@@ -241,6 +291,11 @@ stop_child(pcmk_child_t * child, int signal)
c54a00
 static char *opts_default[] = { NULL, NULL };
c54a00
 static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
c54a00
 
c54a00
+/* TODO once libqb is taught to juggle with IPC end-points carried over as
c54a00
+        bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325)
c54a00
+        it shall hand over these descriptors here if/once they are successfully
c54a00
+        pre-opened in (presumably) pcmk_child_active, to avoid any remaining
c54a00
+        room for races */
c54a00
 static gboolean
c54a00
 start_child(pcmk_child_t * child)
c54a00
 {
c54a00
@@ -371,7 +426,10 @@ escalate_shutdown(gpointer data)
c54a00
 
c54a00
     pcmk_child_t *child = data;
c54a00
 
c54a00
-    if (child->pid) {
c54a00
+    if (child->pid == PCMK__SPECIAL_PID) {
c54a00
+        pcmk_process_exit(child);
c54a00
+
c54a00
+    } else if (child->pid) {
c54a00
         /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */
c54a00
         crm_err("Child %s not terminating in a timely manner, forcing", child->name);
c54a00
         stop_child(child, SIGSEGV);
c54a00
@@ -379,6 +437,8 @@ escalate_shutdown(gpointer data)
c54a00
     return FALSE;
c54a00
 }
c54a00
 
c54a00
+#define SHUTDOWN_ESCALATION_PERIOD 180000  /* 3m */
c54a00
+
c54a00
 static gboolean
c54a00
 pcmk_shutdown_worker(gpointer user_data)
c54a00
 {
c54a00
@@ -407,11 +467,24 @@ pcmk_shutdown_worker(gpointer user_data)
c54a00
                 time_t now = time(NULL);
c54a00
 
c54a00
                 if (child->respawn) {
c54a00
+                    if (child->pid == PCMK__SPECIAL_PID) {
c54a00
+                        crm_warn("The process behind %s IPC cannot be"
c54a00
+                                 " terminated, so either wait the graceful"
c54a00
+                                 " period of %ld s for its native termination"
c54a00
+                                 " if it vitally depends on some other daemons"
c54a00
+                                 " going down in a controlled way already,"
c54a00
+                                 " or locate and kill the correct %s process"
c54a00
+                                 " on your own; set PCMK_fail_fast=1 to avoid"
c54a00
+                                 " this altogether next time around",
c54a00
+                                 child->name, (long) SHUTDOWN_ESCALATION_PERIOD,
c54a00
+                                 child->command);
c54a00
+                    }
c54a00
                     next_log = now + 30;
c54a00
                     child->respawn = FALSE;
c54a00
                     stop_child(child, SIGTERM);
c54a00
                     if (phase < pcmk_children[pcmk_child_crmd].start_seq) {
c54a00
-                        g_timeout_add(180000 /* 3m */ , escalate_shutdown, child);
c54a00
+                        g_timeout_add(SHUTDOWN_ESCALATION_PERIOD,
c54a00
+                                      escalate_shutdown, child);
c54a00
                     }
c54a00
 
c54a00
                 } else if (now >= next_log) {
c54a00
@@ -696,7 +769,106 @@ mcp_chown(const char *path, uid_t uid, gid_t gid)
c54a00
     }
c54a00
 }
c54a00
 
c54a00
-#if SUPPORT_PROCFS
c54a00
+/*!
c54a00
+ * \internal
c54a00
+ * \brief Check the liveness of the child based on IPC name and PID if tracked
c54a00
+ *
c54a00
+ * \param[inout] child  Child tracked data
c54a00
+ *
c54a00
+ * \return 0 if no trace of child's liveness detected (while it is detectable
c54a00
+ *         to begin with, at least according to one of the two properties),
c54a00
+ *         1 if everything is fine, 2 if it's up per PID, but not per IPC
c54a00
+ *         end-point (still starting?), -1 on error, and -2 when the child
c54a00
+ *         (its IPC) blocked with an unauthorized process (log message
c54a00
+ *         emitted in both latter cases)
c54a00
+ *
c54a00
+ * \note This function doesn't modify any of \p child members but \c pid,
c54a00
+ *       and is not actively toying with processes as such but invoking
c54a00
+ *       \c stop_child in one particular case (there's for some reason
c54a00
+ *       a different authentic holder of the IPC end-point).
c54a00
+ */
c54a00
+static int
c54a00
+pcmk_child_active(pcmk_child_t *child) {
c54a00
+    static uid_t cl_uid = 0;
c54a00
+    static gid_t cl_gid = 0;
c54a00
+    const uid_t root_uid = 0;
c54a00
+    const gid_t root_gid = 0;
c54a00
+    const uid_t *ref_uid;
c54a00
+    const gid_t *ref_gid;
c54a00
+    int ret = 0;
c54a00
+    pid_t ipc_pid = 0;
c54a00
+    const char *use_name;
c54a00
+
c54a00
+    if (child->endpoint == NULL
c54a00
+            && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) {
c54a00
+        crm_err("Cannot track child %s for missing both API end-point and PID",
c54a00
+                child->name);
c54a00
+        ret = -1;  /* misuse of the function when child is not trackable */
c54a00
+
c54a00
+    } else if (child->endpoint != NULL) {
c54a00
+
c54a00
+        ref_uid = (child->uid != NULL) ? &cl_uid : &root_uid;
c54a00
+        ref_gid = (child->uid != NULL) ? &cl_gid : &root_gid;
c54a00
+
c54a00
+        if (child->uid != NULL && !cl_uid && !cl_gid
c54a00
+                && crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid) < 0) {
c54a00
+            crm_err("Could not find user and group IDs for user %s",
c54a00
+                    CRM_DAEMON_USER);
c54a00
+            ret = -1;
c54a00
+        } else if ((ret = pcmk__ipc_is_authentic_process_active(child->endpoint,
c54a00
+                                                                *ref_uid, *ref_gid,
c54a00
+                                                                &ipc_pid)) < 0) {
c54a00
+            /* game over */
c54a00
+        } else if (child->pid <= 0) {
c54a00
+            /* hit new child to be initialized, or reset to zero
c54a00
+               and investigate further for ret == 0 */
c54a00
+            child->pid = ipc_pid;
c54a00
+        } else if (ipc_pid && child->pid != ipc_pid) {
c54a00
+            /* ultimately strange for ret == 1; either way, investigate */
c54a00
+            ret = 0;
c54a00
+        }
c54a00
+    }
c54a00
+
c54a00
+    if (!ret) {
c54a00
+        use_name = (child->flag == crm_proc_stonith_ng)
c54a00
+                     ? "stonithd"  /* compensate "simplification" 61fc951e9 */
c54a00
+                     : child->name;
c54a00
+        /* when no IPC based liveness detected (incl. if ever a child without
c54a00
+           IPC is tracked), or detected for a different _authentic_ process;
c54a00
+           safe on FreeBSD since the only change possible from a proper child's
c54a00
+           PID into "special" PID of 1 behind more loosely related process */
c54a00
+        ret = crm_pid_active(child->pid, use_name);
c54a00
+        if (ipc_pid && (ret != 1
c54a00
+                        || ipc_pid == PCMK__SPECIAL_PID
c54a00
+                        || crm_pid_active(ipc_pid, use_name) == 1)) {
c54a00
+            if (ret == 1) {
c54a00
+                /* assume there's no forking-while-retaining-IPC-socket
c54a00
+                   involved in the "children's" lifecycle, hence that the
c54a00
+                   tracking got out of sync purely because of some external
c54a00
+                   (esotheric?) forces (user initiated process "refresh" by
c54a00
+                   force? or intentionally racing on start-up, even?), and
c54a00
+                   that switching over to this other detected, authentic
c54a00
+                   instance with an IPC already in possession is a better
c54a00
+                   trade-off than "neutralizing" it first so as to give
c54a00
+                   either the original or possibly a new to-be-spawned
c54a00
+                   daemon process a leeway for operation, which would
c54a00
+                   otherwise have to be carried out */
c54a00
+                /* not possessing IPC, afterall (what about corosync CPG?) */
c54a00
+                stop_child(child, SIGKILL);
c54a00
+            } else {
c54a00
+                ret = 1;
c54a00
+            }
c54a00
+            child->pid = ipc_pid;
c54a00
+        } else if (ret == 1) {
c54a00
+            ret = 2;  /* up per PID, but not per IPC (still starting?) */
c54a00
+        } else if (!child->pid && ret == -1) {
c54a00
+            ret = 0;  /* correct -1 on FreeBSD from above back to 0 */
c54a00
+        }
c54a00
+    }
c54a00
+
c54a00
+    return ret;
c54a00
+}
c54a00
+
c54a00
 static gboolean
c54a00
 check_active_before_startup_processes(gpointer user_data)
c54a00
 {
c54a00
@@ -713,15 +885,41 @@ check_active_before_startup_processes(gpointer user_data)
c54a00
                 continue;
c54a00
             } else {
c54a00
                 const char *name = pcmk_children[lpc].name;
c54a00
-                if (pcmk_children[lpc].flag == crm_proc_stonith_ng) {
c54a00
-                    name = "stonithd";
c54a00
-                }
c54a00
-
c54a00
-                if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) {
c54a00
-                    crm_notice("Process %s terminated (pid=%d)",
c54a00
-                           name, pcmk_children[lpc].pid);
c54a00
-                    pcmk_process_exit(&(pcmk_children[lpc]));
c54a00
-                    continue;
c54a00
+                int ret;
c54a00
+
c54a00
+                switch ((ret = pcmk_child_active(&pcmk_children[lpc]))) {
c54a00
+                    case 1:
c54a00
+                        break;
c54a00
+                    case 0:
c54a00
+                    case 2:  /* this very case: it was OK once already */
c54a00
+                        if (pcmk_children[lpc].respawn == TRUE) {
c54a00
+                            /* presumably after crash, hence critical */
c54a00
+                            crm_crit("Process %s terminated (pid=%lld)%s", \
c54a00
+                                     name, (long long)
c54a00
+                                     PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
c54a00
+                                     ret ? ", at least per IPC end-point that went AWOL"
c54a00
+                                         : "");
c54a00
+                        } else {
c54a00
+                            /* orderly shutdown */
c54a00
+                            crm_notice("Process %s terminated (pid=%lld)%s", \
c54a00
+                                       name, (long long)
c54a00
+                                       PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid),
c54a00
+                                       ret ? ", at least per IPC end-point that went AWOL"
c54a00
+                                           : "");
c54a00
+                        }
c54a00
+                        pcmk_process_exit(&(pcmk_children[lpc]));
c54a00
+                        continue;
c54a00
+                    default:
c54a00
+                        crm_crit("Unexpected value from pcmk_child_active:"
c54a00
+                                 " %d (pid=%lld)", ret,
c54a00
+                                 (long long) PCMK__SPECIAL_PID_AS_0(
c54a00
+                                                 pcmk_children[lpc].pid));
c54a00
+                        /* fall through */
c54a00
+                    case -1:
c54a00
+                    case -2:
c54a00
+                        /* message(s) already emitted */
c54a00
+                        crm_exit(DAEMON_RESPAWN_STOP);
c54a00
+                        break;  /* static analysis/noreturn */
c54a00
                 }
c54a00
             }
c54a00
             /* at least one of the processes found at startup
c54a00
@@ -730,61 +928,147 @@ check_active_before_startup_processes(gpointer user_data)
c54a00
         }
c54a00
     }
c54a00
 
c54a00
+    global_keep_tracking = keep_tracking;
c54a00
     return keep_tracking;
c54a00
 }
c54a00
-#endif // SUPPORT_PROCFS
c54a00
 
c54a00
-static void
c54a00
+/*!
c54a00
+ * \internal
c54a00
+ * \brief Initial one-off check of the pre-existing "child" processes
c54a00
+ *
c54a00
+ * With "child" process, we mean the subdaemon that defines an API end-point
c54a00
+ * (all of them do as of the comment) -- the possible complement is skipped
c54a00
+ * as it is deemed it has no such shared resources to cause conflicts about,
c54a00
+ * hence it can presumably be started anew without hesitation.
c54a00
+ * If that won't hold true in the future, the concept of a shared resource
c54a00
+ * will have to be generalized beyond the API end-point.
c54a00
+ *
c54a00
+ * For boundary cases that the "child" is still starting (IPC end-point is yet
c54a00
+ * to be witnessed), or more rarely (practically FreeBSD only), when there's
c54a00
+ * a pre-existing "untrackable" authentic process, we give the situation some
c54a00
+ * time to possibly unfold in the right direction, meaning that said socket
c54a00
+ * will appear or the unattainable process will disappear per the observable
c54a00
+ * IPC, respectively.
c54a00
+ *
c54a00
+ * \return 0 if no such "child" process found, positive number X when X
c54a00
+ *         "children" detected, -1 on an internal error, -2 when any
c54a00
+ *         would-be-used IPC is blocked with an unauthorized process
c54a00
+ *
c54a00
+ * \note Since this gets run at the very start, \c respawn_count fields
c54a00
+ *       for particular children get temporarily overloaded with "rounds
c54a00
+ *       of waiting" tracking, restored once we are about to finish with
c54a00
+ *       success (i.e. returning value >=0) and will remain unrestored
c54a00
+ *       otherwise.  One way to suppress liveness detection logic for
c54a00
+ *       particular child is to set the said value to a negative number.
c54a00
+ */
c54a00
+#define WAIT_TRIES 4  /* together with interleaved sleeps, worst case ~ 1s */
c54a00
+static int
c54a00
 find_and_track_existing_processes(void)
c54a00
 {
c54a00
-#if SUPPORT_PROCFS
c54a00
-    DIR *dp;
c54a00
-    struct dirent *entry;
c54a00
-    bool start_tracker = FALSE;
c54a00
-    char entry_name[64];
c54a00
-
c54a00
-    dp = opendir("/proc");
c54a00
-    if (!dp) {
c54a00
-        /* no proc directory to search through */
c54a00
-        crm_notice("Can not read /proc directory to track existing components");
c54a00
-        return;
c54a00
-    }
c54a00
-
c54a00
-    while ((entry = readdir(dp)) != NULL) {
c54a00
-        int pid;
c54a00
-        int max = SIZEOF(pcmk_children);
c54a00
-        int i;
c54a00
-
c54a00
-        if (crm_procfs_process_info(entry, entry_name, &pid) < 0) {
c54a00
-            continue;
c54a00
-        }
c54a00
-        for (i = 0; i < max; i++) {
c54a00
-            const char *name = pcmk_children[i].name;
c54a00
-
c54a00
-            if (pcmk_children[i].start_seq == 0) {
c54a00
+    unsigned tracking = 0U;
c54a00
+    bool wait_in_progress;
c54a00
+    int cur;
c54a00
+    size_t i, rounds;
c54a00
+
c54a00
+    for (rounds = 1; rounds <= WAIT_TRIES; rounds++) {
c54a00
+        wait_in_progress = false;
c54a00
+        for (i = 0; i < SIZEOF(pcmk_children); i++) {
c54a00
+            if (!pcmk_children[i].endpoint
c54a00
+                    || pcmk_children[i].respawn_count < 0
c54a00
+                    || !(cur = pcmk_child_active(&pcmk_children[i]))) {
c54a00
+                /* as a speculation, don't give up in the context of
c54a00
+                   pcmk_child_active check if there are more rounds to
c54a00
+                   come for other reasons, but don't artificially wait just
c54a00
+                   because of this, since we would preferably start ASAP */
c54a00
                 continue;
c54a00
             }
c54a00
-            if (pcmk_children[i].flag == crm_proc_stonith_ng) {
c54a00
-                name = "stonithd";
c54a00
-            }
c54a00
-            if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) {
c54a00
-                crm_notice("Tracking existing %s process (pid=%d)", name, pid);
c54a00
-                pcmk_children[i].pid = pid;
c54a00
-                pcmk_children[i].active_before_startup = TRUE;
c54a00
-                start_tracker = TRUE;
c54a00
-                break;
c54a00
+            pcmk_children[i].respawn_count = rounds;
c54a00
+            switch (cur) {
c54a00
+                case 1:
c54a00
+                    if (pcmk_children[i].pid == PCMK__SPECIAL_PID) {
c54a00
+                        if (crm_is_true(getenv("PCMK_fail_fast"))) {
c54a00
+                            crm_crit("Cannot reliably track pre-existing"
c54a00
+                                     " authentic process behind %s IPC on this"
c54a00
+                                     " platform and PCMK_fail_fast requested",
c54a00
+                                     pcmk_children[i].endpoint);
c54a00
+                            return -1;
c54a00
+                        } else if (pcmk_children[i].respawn_count == WAIT_TRIES) {
c54a00
+                            crm_notice("Assuming pre-existing authentic, though"
c54a00
+                                       " on this platform untrackable, process"
c54a00
+                                       " behind %s IPC is stable (was in %d"
c54a00
+                                       " previous samples) so rather than"
c54a00
+                                       " bailing out (PCMK_fail_fast not"
c54a00
+                                       " requested), we just switch to a less"
c54a00
+                                       " optimal IPC liveness monitoring"
c54a00
+                                       " (not very suitable for heavy load)",
c54a00
+                                       pcmk_children[i].name, WAIT_TRIES - 1);
c54a00
+                            crm_warn("The process behind %s IPC cannot be"
c54a00
+                                     " terminated, so the overall shutdown"
c54a00
+                                     " will get delayed implicitly (%ld s),"
c54a00
+                                     " which serves as a graceful period for"
c54a00
+                                     " its native termination if it vitally"
c54a00
+                                     " depends on some other daemons going"
c54a00
+                                     " down in a controlled way already",
c54a00
+                                     pcmk_children[i].name,
c54a00
+                                     (long) SHUTDOWN_ESCALATION_PERIOD);
c54a00
+                        } else {
c54a00
+                            wait_in_progress = true;
c54a00
+                            crm_warn("Cannot reliably track pre-existing"
c54a00
+                                     " authentic process behind %s IPC on this"
c54a00
+                                     " platform, can still disappear in %d"
c54a00
+                                     " attempt(s)", pcmk_children[i].endpoint,
c54a00
+                                     WAIT_TRIES - pcmk_children[i].respawn_count);
c54a00
+                            continue;
c54a00
+                        }
c54a00
+                    }
c54a00
+                    crm_notice("Tracking existing %s process (pid=%lld)",
c54a00
+                               pcmk_children[i].name,
c54a00
+                               (long long) PCMK__SPECIAL_PID_AS_0(
c54a00
+                                               pcmk_children[i].pid));
c54a00
+                    pcmk_children[i].respawn_count = -1;  /* 0~keep watching */
c54a00
+                    pcmk_children[i].active_before_startup = TRUE;
c54a00
+                    tracking++;
c54a00
+                    break;
c54a00
+                case 2:
c54a00
+                    if (pcmk_children[i].respawn_count == WAIT_TRIES) {
c54a00
+                        crm_crit("%s IPC end-point for existing authentic"
c54a00
+                                 " process %lld did not (re)appear",
c54a00
+                                 pcmk_children[i].endpoint,
c54a00
+                                 (long long) PCMK__SPECIAL_PID_AS_0(
c54a00
+                                                 pcmk_children[i].pid));
c54a00
+                        return -1;
c54a00
+                    }
c54a00
+                    wait_in_progress = true;
c54a00
+                    crm_warn("Cannot find %s IPC end-point for existing"
c54a00
+                             " authentic process %lld, can still (re)appear"
c54a00
+                             " in %d attempts (?)",
c54a00
+                             pcmk_children[i].endpoint,
c54a00
+                             (long long) PCMK__SPECIAL_PID_AS_0(
c54a00
+                                             pcmk_children[i].pid),
c54a00
+                             WAIT_TRIES - pcmk_children[i].respawn_count);
c54a00
+                    continue;
c54a00
+                case -1:
c54a00
+                case -2:
c54a00
+                    return cur;  /* messages already emitted */
c54a00
+                default:
c54a00
+                    crm_crit("Unexpected condition"CRM_XS"cur=%d", cur);
c54a00
+                    return -1;  /* unexpected condition */
c54a00
             }
c54a00
         }
c54a00
+        if (!wait_in_progress) {
c54a00
+            break;
c54a00
+        }
c54a00
+        (void) poll(NULL, 0, 250);  /* a bit for changes to possibly happen */
c54a00
+    }
c54a00
+    for (i = 0; i < SIZEOF(pcmk_children); i++) {
c54a00
+        pcmk_children[i].respawn_count = 0;  /* restore pristine state */
c54a00
     }
c54a00
 
c54a00
-    if (start_tracker) {
c54a00
-        g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes,
c54a00
-                              NULL);
c54a00
+    if (tracking) {
c54a00
+        g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
c54a00
+                              check_active_before_startup_processes, NULL);
c54a00
     }
c54a00
-    closedir(dp);
c54a00
-#else
c54a00
-    crm_notice("No procfs support, so skipping check for existing components");
c54a00
-#endif // SUPPORT_PROCFS
c54a00
+    return (tracking > INT_MAX) ? INT_MAX : tracking;
c54a00
 }
c54a00
 
c54a00
 static void
c54a00
@@ -1106,7 +1390,16 @@ main(int argc, char **argv)
c54a00
         setenv("PCMK_watchdog", "false", 1);
c54a00
     }
c54a00
 
c54a00
-    find_and_track_existing_processes();
c54a00
+    switch (find_and_track_existing_processes()) {
c54a00
+        case -1:
c54a00
+            crm_crit("Internal fatality, see the log");
c54a00
+            crm_exit(DAEMON_RESPAWN_STOP);
c54a00
+        case -2:
c54a00
+            crm_crit("Blocked by foreign process, kill the offender");
c54a00
+            crm_exit(ENOLCK);
c54a00
+        default:
c54a00
+            break;
c54a00
+    };
c54a00
 
c54a00
     cluster.destroy = mcp_cpg_destroy;
c54a00
     cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver;
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 07a82c5c8f9d60989ea88c5a3cc316ee290ea784 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Tue, 16 Apr 2019 00:04:57 +0200
c54a00
Subject: [PATCH 5/7] High: pacemakerd vs. IPC/procfs confused deputy
c54a00
 authenticity issue (3/4)
c54a00
c54a00
[3/4: other daemons to authenticate IPC servers of fellow processes]
c54a00
c54a00
Now that CVE-2018-16877 issue alone is still only partially covered
c54a00
based on the preceding commits in the set, put the server-by-client
c54a00
authentication (enabled and 1/3 and partially sported in 2/3) into
c54a00
practice widely amongst the communicating pacemaker child daemons and
c54a00
towards CPG API provided by 3rd party but principally using the same
c54a00
underlying IPC mechanism facilitated by libqb, and consequently close
c54a00
the remaining "big gap".
c54a00
c54a00
As a small justification to introducing yet another "return
c54a00
value" int variable, type-correctness is restored for those
c54a00
that shall be cs_error_t to begin with.
c54a00
---
c54a00
 lib/cluster/corosync.c | 178 +++++++++++++++++++++++++++++++++++++++++--------
c54a00
 lib/cluster/cpg.c      |  94 +++++++++++++++++++-------
c54a00
 lib/common/ipc.c       |  43 +++++++++++-
c54a00
 mcp/corosync.c         |  76 ++++++++++++++++-----
c54a00
 4 files changed, 320 insertions(+), 71 deletions(-)
c54a00
c54a00
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
c54a00
index 9719541..0acf9b2 100644
c54a00
--- a/lib/cluster/corosync.c
c54a00
+++ b/lib/cluster/corosync.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This library is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU Lesser General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2.1 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This library is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * Lesser General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU Lesser General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -40,6 +31,8 @@
c54a00
 
c54a00
 #include <crm/msg_xml.h>
c54a00
 
c54a00
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
c54a00
+
c54a00
 quorum_handle_t pcmk_quorum_handle = 0;
c54a00
 
c54a00
 gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL;
c54a00
@@ -52,10 +45,15 @@ char *
c54a00
 corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
c54a00
 {
c54a00
     int lpc = 0;
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
     int retries = 0;
c54a00
     char *name = NULL;
c54a00
     cmap_handle_t local_handle = 0;
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     /* nodeid == 0 == CMAN_NODEID_US */
c54a00
     if (nodeid == 0) {
c54a00
@@ -85,6 +83,27 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
c54a00
 
c54a00
     if (cmap_handle == 0) {
c54a00
         cmap_handle = local_handle;
c54a00
+
c54a00
+        rc = cmap_fd_get(cmap_handle, &fd;;
c54a00
+        if (rc != CS_OK) {
c54a00
+            crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
+                    cs_strerror(rc), rc);
c54a00
+            goto bail;
c54a00
+        }
c54a00
+
c54a00
+        /* CMAP provider run as root (in given user namespace, anyway)? */
c54a00
+        if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                                &found_uid, &found_gid))) {
c54a00
+            crm_err("CMAP provider is not authentic:"
c54a00
+                    " process %lld (uid: %lld, gid: %lld)",
c54a00
+                    (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                    (long long) found_uid, (long long) found_gid);
c54a00
+            goto bail;
c54a00
+        } else if (rv < 0) {
c54a00
+            crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
c54a00
+                    strerror(-rv), -rv);
c54a00
+            goto bail;
c54a00
+        }
c54a00
     }
c54a00
 
c54a00
     while (name == NULL && cmap_handle != 0) {
c54a00
@@ -126,6 +145,7 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid)
c54a00
         lpc++;
c54a00
     }
c54a00
 
c54a00
+bail:
c54a00
     if(local_handle) {
c54a00
         cmap_finalize(local_handle);
c54a00
     }
c54a00
@@ -249,11 +269,15 @@ gboolean
c54a00
 cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
c54a00
                        void (*destroy) (gpointer))
c54a00
 {
c54a00
-    int rc = -1;
c54a00
+    cs_error_t rc;
c54a00
     int fd = 0;
c54a00
     int quorate = 0;
c54a00
     uint32_t quorum_type = 0;
c54a00
     struct mainloop_fd_callbacks quorum_fd_callbacks;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch;
c54a00
     quorum_fd_callbacks.destroy = destroy;
c54a00
@@ -262,7 +286,8 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
c54a00
 
c54a00
     rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type);
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("Could not connect to the Quorum API: %d", rc);
c54a00
+        crm_err("Could not connect to the Quorum API: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
         goto bail;
c54a00
 
c54a00
     } else if (quorum_type != QUORUM_SET) {
c54a00
@@ -270,6 +295,29 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
+    rc = quorum_fd_get(pcmk_quorum_handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the Quorum API connection: %s (%d)",
c54a00
+                strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* Quorum provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("Quorum provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        rc = CS_ERR_ACCESS;
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of Quorum provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        rc = CS_ERR_ACCESS;
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
     rc = quorum_getquorate(pcmk_quorum_handle, &quorate);
c54a00
     if (rc != CS_OK) {
c54a00
         crm_err("Could not obtain the current Quorum API state: %d", rc);
c54a00
@@ -290,12 +338,6 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
-    rc = quorum_fd_get(pcmk_quorum_handle, &fd;;
c54a00
-    if (rc != CS_OK) {
c54a00
-        crm_err("Could not obtain the Quorum API connection: %d", rc);
c54a00
-        goto bail;
c54a00
-    }
c54a00
-
c54a00
     mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks);
c54a00
 
c54a00
     corosync_initialize_nodelist(NULL, FALSE, NULL);
c54a00
@@ -486,10 +528,15 @@ gboolean
c54a00
 corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent)
c54a00
 {
c54a00
     int lpc = 0;
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
     int retries = 0;
c54a00
     gboolean any = FALSE;
c54a00
     cmap_handle_t cmap_handle;
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     do {
c54a00
         rc = cmap_initialize(&cmap_handle);
c54a00
@@ -507,6 +554,27 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
+    rc = cmap_fd_get(cmap_handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* CMAP provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CMAP provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
     crm_peer_init();
c54a00
     crm_trace("Initializing corosync nodelist");
c54a00
     for (lpc = 0; TRUE; lpc++) {
c54a00
@@ -560,6 +628,7 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml
c54a00
 
c54a00
         free(name);
c54a00
     }
c54a00
+bail:
c54a00
     cmap_finalize(cmap_handle);
c54a00
     return any;
c54a00
 }
c54a00
@@ -569,36 +638,68 @@ corosync_cluster_name(void)
c54a00
 {
c54a00
     cmap_handle_t handle;
c54a00
     char *cluster_name = NULL;
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     rc = cmap_initialize(&handle);
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc);
c54a00
+        crm_info("Failed to initialize the cmap API: %s (%d)",
c54a00
+                 cs_strerror(rc), rc);
c54a00
         return NULL;
c54a00
     }
c54a00
 
c54a00
+    rc = cmap_fd_get(handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* CMAP provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CMAP provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
     rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name);
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc);
c54a00
+        crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc);
c54a00
 
c54a00
     } else {
c54a00
         crm_debug("cmap totem.cluster_name = '%s'", cluster_name);
c54a00
     }
c54a00
 
c54a00
+bail:
c54a00
     cmap_finalize(handle);
c54a00
-
c54a00
     return cluster_name;
c54a00
 }
c54a00
 
c54a00
 int
c54a00
 corosync_cmap_has_config(const char *prefix)
c54a00
 {
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
     int retries = 0;
c54a00
     static int found = -1;
c54a00
     cmap_handle_t cmap_handle;
c54a00
     cmap_iter_handle_t iter_handle;
c54a00
     char key_name[CMAP_KEYNAME_MAXLEN + 1];
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     if(found != -1) {
c54a00
         return found;
c54a00
@@ -621,6 +722,27 @@ corosync_cmap_has_config(const char *prefix)
c54a00
         return -1;
c54a00
     }
c54a00
 
c54a00
+    rc = cmap_fd_get(cmap_handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* CMAP provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CMAP provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
     rc = cmap_iter_init(cmap_handle, prefix, &iter_handle);
c54a00
     if (rc != CS_OK) {
c54a00
         crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)",
c54a00
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
c54a00
index 1e6cf79..a61d492 100644
c54a00
--- a/lib/cluster/cpg.c
c54a00
+++ b/lib/cluster/cpg.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This library is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU Lesser General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2.1 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This library is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * Lesser General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU Lesser General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU Lesser General Public License
c54a00
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -38,6 +29,8 @@
c54a00
 
c54a00
 #include <crm/msg_xml.h>
c54a00
 
c54a00
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
c54a00
+
c54a00
 cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */
c54a00
 
c54a00
 static bool cpg_evicted = FALSE;
c54a00
@@ -71,11 +64,16 @@ cluster_disconnect_cpg(crm_cluster_t *cluster)
c54a00
 
c54a00
 uint32_t get_local_nodeid(cpg_handle_t handle)
c54a00
 {
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
     int retries = 0;
c54a00
     static uint32_t local_nodeid = 0;
c54a00
     cpg_handle_t local_handle = handle;
c54a00
     cpg_callbacks_t cb = { };
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     if(local_nodeid != 0) {
c54a00
         return local_nodeid;
c54a00
@@ -92,6 +90,32 @@ uint32_t get_local_nodeid(cpg_handle_t handle)
c54a00
     if(handle == 0) {
c54a00
         crm_trace("Creating connection");
c54a00
         cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb));
c54a00
+        if (rc != CS_OK) {
c54a00
+            crm_err("Could not connect to the CPG API: %s (%d)",
c54a00
+                    cs_strerror(rc), rc);
c54a00
+            return 0;
c54a00
+        }
c54a00
+
c54a00
+        rc = cpg_fd_get(local_handle, &fd;;
c54a00
+        if (rc != CS_OK) {
c54a00
+            crm_err("Could not obtain the CPG API connection: %s (%d)",
c54a00
+                    cs_strerror(rc), rc);
c54a00
+            goto bail;
c54a00
+        }
c54a00
+
c54a00
+        /* CPG provider run as root (in given user namespace, anyway)? */
c54a00
+        if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                                &found_uid, &found_gid))) {
c54a00
+            crm_err("CPG provider is not authentic:"
c54a00
+                    " process %lld (uid: %lld, gid: %lld)",
c54a00
+                    (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                    (long long) found_uid, (long long) found_gid);
c54a00
+            goto bail;
c54a00
+        } else if (rv < 0) {
c54a00
+            crm_err("Could not verify authenticity of CPG provider: %s (%d)",
c54a00
+                    strerror(-rv), -rv);
c54a00
+            goto bail;
c54a00
+        }
c54a00
     }
c54a00
 
c54a00
     if (rc == CS_OK) {
c54a00
@@ -103,6 +127,8 @@ uint32_t get_local_nodeid(cpg_handle_t handle)
c54a00
     if (rc != CS_OK) {
c54a00
         crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc);
c54a00
     }
c54a00
+
c54a00
+bail:
c54a00
     if(handle == 0) {
c54a00
         crm_trace("Closing connection");
c54a00
         cpg_finalize(local_handle);
c54a00
@@ -435,12 +461,16 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
 gboolean
c54a00
 cluster_connect_cpg(crm_cluster_t *cluster)
c54a00
 {
c54a00
-    int rc = -1;
c54a00
-    int fd = 0;
c54a00
+    cs_error_t rc;
c54a00
+    int fd = -1;
c54a00
     int retries = 0;
c54a00
     uint32_t id = 0;
c54a00
     crm_node_t *peer = NULL;
c54a00
     cpg_handle_t handle = 0;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     struct mainloop_fd_callbacks cpg_fd_callbacks = {
c54a00
         .dispatch = pcmk_cpg_dispatch,
c54a00
@@ -465,7 +495,31 @@ cluster_connect_cpg(crm_cluster_t *cluster)
c54a00
 
c54a00
     cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks));
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("Could not connect to the Cluster Process Group API: %d", rc);
c54a00
+        crm_err("Could not connect to the CPG API: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    rc = cpg_fd_get(handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the CPG API connection: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* CPG provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CPG provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        rc = CS_ERR_ACCESS;
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CPG provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        rc = CS_ERR_ACCESS;
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
@@ -484,12 +538,6 @@ cluster_connect_cpg(crm_cluster_t *cluster)
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
-    rc = cpg_fd_get(handle, &fd;;
c54a00
-    if (rc != CS_OK) {
c54a00
-        crm_err("Could not obtain the CPG API connection: %d", rc);
c54a00
-        goto bail;
c54a00
-    }
c54a00
-
c54a00
     pcmk_cpg_handle = handle;
c54a00
     cluster->cpg_handle = handle;
c54a00
     mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks);
c54a00
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
c54a00
index 5b47dd6..3e547f3 100644
c54a00
--- a/lib/common/ipc.c
c54a00
+++ b/lib/common/ipc.c
c54a00
@@ -916,11 +916,18 @@ crm_ipc_new(const char *name, size_t max_size)
c54a00
  *
c54a00
  * \param[in] client  Connection instance obtained from crm_ipc_new()
c54a00
  *
c54a00
- * \return TRUE on success, FALSE otherwise (in which case errno will be set)
c54a00
+ * \return TRUE on success, FALSE otherwise (in which case errno will be set;
c54a00
+ *         specifically, in case of discovering the remote side is not
c54a00
+ *         authentic, its value is set to ECONNABORTED).
c54a00
  */
c54a00
 bool
c54a00
 crm_ipc_connect(crm_ipc_t * client)
c54a00
 {
c54a00
+    static uid_t cl_uid = 0;
c54a00
+    static gid_t cl_gid = 0;
c54a00
+    pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
c54a00
+    int rv;
c54a00
+
c54a00
     client->need_reply = FALSE;
c54a00
     client->ipc = qb_ipcc_connect(client->name, client->buf_size);
c54a00
 
c54a00
@@ -931,7 +938,39 @@ crm_ipc_connect(crm_ipc_t * client)
c54a00
 
c54a00
     client->pfd.fd = crm_ipc_get_fd(client);
c54a00
     if (client->pfd.fd < 0) {
c54a00
-        crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno);
c54a00
+        rv = errno;
c54a00
+        /* message already omitted */
c54a00
+        crm_ipc_close(client);
c54a00
+        errno = rv;
c54a00
+        return FALSE;
c54a00
+    }
c54a00
+
c54a00
+    if (!cl_uid && !cl_gid
c54a00
+            && (rv = crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid)) < 0) {
c54a00
+        errno = -rv;
c54a00
+        /* message already omitted */
c54a00
+        crm_ipc_close(client);
c54a00
+        errno = -rv;
c54a00
+        return FALSE;
c54a00
+    }
c54a00
+
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid,
c54a00
+                                            &found_pid, &found_uid,
c54a00
+                                            &found_gid))) {
c54a00
+        crm_err("Daemon (IPC %s) is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                client->name,  (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        crm_ipc_close(client);
c54a00
+        errno = ECONNABORTED;
c54a00
+        return FALSE;
c54a00
+
c54a00
+    } else if (rv < 0) {
c54a00
+        errno = -rv;
c54a00
+        crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)",
c54a00
+                   client->name);
c54a00
+        crm_ipc_close(client);
c54a00
+        errno = -rv;
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
diff --git a/mcp/corosync.c b/mcp/corosync.c
c54a00
index 7502da7..407a63f 100644
c54a00
--- a/mcp/corosync.c
c54a00
+++ b/mcp/corosync.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2010 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2010-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU General Public License version 2
c54a00
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 #include <crm_internal.h>
c54a00
 #include <pacemaker.h>
c54a00
@@ -33,8 +24,11 @@
c54a00
 #endif
c54a00
 
c54a00
 #include <crm/cluster/internal.h>
c54a00
+#include <crm/common/ipc.h>     /* for crm_ipc_is_authentic_process */
c54a00
 #include <crm/common/mainloop.h>
c54a00
 
c54a00
+#include <crm/common/ipc_internal.h>  /* PCMK__SPECIAL_PID* */
c54a00
+
c54a00
 #if SUPPORT_CMAN
c54a00
 #  include <libcman.h>
c54a00
 #endif
c54a00
@@ -111,7 +105,10 @@ gboolean
c54a00
 cluster_connect_cfg(uint32_t * nodeid)
c54a00
 {
c54a00
     cs_error_t rc;
c54a00
-    int fd = 0, retries = 0;
c54a00
+    int fd = -1, retries = 0, rv;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
 
c54a00
     static struct mainloop_fd_callbacks cfg_fd_callbacks = {
c54a00
         .dispatch = pcmk_cfg_dispatch,
c54a00
@@ -121,13 +118,27 @@ cluster_connect_cfg(uint32_t * nodeid)
c54a00
     cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks));
c54a00
 
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("corosync cfg init error %d", rc);
c54a00
+        crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc);
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
     rc = corosync_cfg_fd_get(cfg_handle, &fd;;
c54a00
     if (rc != CS_OK) {
c54a00
-        crm_err("corosync cfg fd_get error %d", rc);
c54a00
+        crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc);
c54a00
+        goto bail;
c54a00
+    }
c54a00
+
c54a00
+    /* CFG provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CFG provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        goto bail;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CFG provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
         goto bail;
c54a00
     }
c54a00
 
c54a00
@@ -264,7 +275,7 @@ get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, ch
c54a00
 gboolean
c54a00
 mcp_read_config(void)
c54a00
 {
c54a00
-    int rc = CS_OK;
c54a00
+    cs_error_t rc = CS_OK;
c54a00
     int retries = 0;
c54a00
 
c54a00
     const char *const_value = NULL;
c54a00
@@ -287,11 +298,16 @@ mcp_read_config(void)
c54a00
         } else {
c54a00
             break;
c54a00
         }
c54a00
-
c54a00
     } while (retries < 5);
c54a00
+
c54a00
 #elif HAVE_CMAP
c54a00
     cmap_handle_t local_handle;
c54a00
     uint64_t config = 0;
c54a00
+    int fd = -1;
c54a00
+    uid_t found_uid = 0;
c54a00
+    gid_t found_gid = 0;
c54a00
+    pid_t found_pid = 0;
c54a00
+    int rv;
c54a00
 
c54a00
     /* There can be only one (possibility if confdb isn't around) */
c54a00
     do {
c54a00
@@ -315,6 +331,30 @@ mcp_read_config(void)
c54a00
         return FALSE;
c54a00
     }
c54a00
 
c54a00
+    rc = cmap_fd_get(local_handle, &fd;;
c54a00
+    if (rc != CS_OK) {
c54a00
+        crm_err("Could not obtain the CMAP API connection: %s (%d)",
c54a00
+                cs_strerror(rc), rc);
c54a00
+        cmap_finalize(local_handle);
c54a00
+        return FALSE;
c54a00
+    }
c54a00
+
c54a00
+    /* CMAP provider run as root (in given user namespace, anyway)? */
c54a00
+    if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid,
c54a00
+                                            &found_uid, &found_gid))) {
c54a00
+        crm_err("CMAP provider is not authentic:"
c54a00
+                " process %lld (uid: %lld, gid: %lld)",
c54a00
+                (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
c54a00
+                (long long) found_uid, (long long) found_gid);
c54a00
+        cmap_finalize(local_handle);
c54a00
+        return FALSE;
c54a00
+    } else if (rv < 0) {
c54a00
+        crm_err("Could not verify authenticity of CMAP provider: %s (%d)",
c54a00
+                strerror(-rv), -rv);
c54a00
+        cmap_finalize(local_handle);
c54a00
+        return FALSE;
c54a00
+    }
c54a00
+
c54a00
     stack = get_cluster_type();
c54a00
     crm_info("Reading configure for stack: %s", name_for_cluster_type(stack));
c54a00
 
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 4d6f6e01b309cda7b3f8fe791247566d247d8028 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Tue, 16 Apr 2019 00:08:28 +0200
c54a00
Subject: [PATCH 6/7] High: pacemakerd vs. IPC/procfs confused deputy
c54a00
 authenticity issue (4/4)
c54a00
c54a00
[4/4: CPG users to be careful about now-more-probable rival processes]
c54a00
c54a00
In essence, this comes down to pacemaker confusing at-node CPG members
c54a00
with effectively the only plausible to co-exist at particular node,
c54a00
which doesn't hold and asks for a wider reconciliation of this
c54a00
reality-check.
c54a00
c54a00
However, in practical terms, since there are two factors lowering the
c54a00
priority of doing so:
c54a00
c54a00
1/ possibly the only non-self-inflicted scenario is either that
c54a00
   some of the cluster stack processes fail -- this the problem
c54a00
   that shall rather be deferred to arranged node disarming/fencing
c54a00
   to stay on the safe side with 100% certainty, at the cost of
c54a00
   possibly long-lasting failover process at other nodes
c54a00
   (for other possibility, someone running some of these by accident
c54a00
   so they effectively become rival processes, it's like getting
c54a00
   hands cut when playing with a lawnmower in an unintended way)
c54a00
c54a00
2/ for state tracking of the peer nodes, it may possibly cause troubles
c54a00
   in case the process observed as left wasn't the last for the
c54a00
   particular node, even if presumably just temporary, since the
c54a00
   situation may eventually resolve with imposed serialization of
c54a00
   the rival processes via API end-point singleton restriction (this
c54a00
   is also the most likely cause of why such non-final leave gets
c54a00
   observed in the first place), except in one case -- the legitimate
c54a00
   API end-point carrier won't possibly acknowledged as returned
c54a00
   by its peers, at least not immediately, unless it tries to join
c54a00
   anew, which verges on undefined behaviour (at least per corosync
c54a00
   documentation)
c54a00
c54a00
we make do just with a light code change so as to
c54a00
c54a00
* limit 1/ some more with in-daemon self-check for pre-existing
c54a00
  end-point existence (this is to complement the checks already made in
c54a00
  the parent daemon prior to spawning new instances, only some moments
c54a00
  later; note that we don't have any lock file etc. mechanisms to
c54a00
  prevent parallel runs of the same daemons, and people could run these
c54a00
  on their own deliberation), and to
c54a00
c54a00
* guard against the interferences from the rivals at the same node
c54a00
  per 2/ with ignoring their non-final leave messages altogether.
c54a00
c54a00
Note that CPG at this point is already expected to be authenticity-safe.
c54a00
c54a00
Regarding now-more-probable part, we actually traded the inherently racy
c54a00
procfs scanning for something (exactly that singleton mentioned above)
c54a00
rather firm (and unfakeable), but we admittedly got lost track of
c54a00
processes that are after CPG membership (that is, another form of
c54a00
a shared state) prior to (or in non-deterministic order allowing for
c54a00
the same) carring about publishing the end-point.
c54a00
c54a00
Big thanks is owed to Yan Gao of SUSE, for early discovery and reporting
c54a00
this discrepancy arising from the earlier commits in the set.
c54a00
---
c54a00
 attrd/main.c      | 19 ++++++++++-
c54a00
 cib/main.c        | 35 ++++++++++++---------
c54a00
 crmd/main.c       | 35 ++++++++++++---------
c54a00
 fencing/main.c    | 32 +++++++++++--------
c54a00
 lib/cluster/cpg.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
c54a00
 5 files changed, 163 insertions(+), 52 deletions(-)
c54a00
c54a00
diff --git a/attrd/main.c b/attrd/main.c
c54a00
index 4cc15cc..e0a1e7c 100644
c54a00
--- a/attrd/main.c
c54a00
+++ b/attrd/main.c
c54a00
@@ -1,5 +1,7 @@
c54a00
 /*
c54a00
- * Copyright 2013-2019 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2013-2019 the Pacemaker project contributors
c54a00
+ *
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
  * This program is free software; you can redistribute it and/or
c54a00
  * modify it under the terms of the GNU General Public
c54a00
@@ -336,6 +338,7 @@ main(int argc, char **argv)
c54a00
     int index = 0;
c54a00
     int argerr = 0;
c54a00
     qb_ipcs_service_t *ipcs = NULL;
c54a00
+    crm_ipc_t *old_instance = NULL;
c54a00
 
c54a00
     attrd_init_mainloop();
c54a00
     crm_log_preinit(NULL, argc, argv);
c54a00
@@ -372,6 +375,20 @@ main(int argc, char **argv)
c54a00
 
c54a00
     crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
c54a00
     crm_info("Starting up");
c54a00
+
c54a00
+    old_instance = crm_ipc_new(T_ATTRD, 0);
c54a00
+    if (crm_ipc_connect(old_instance)) {
c54a00
+        /* IPC end-point already up */
c54a00
+        crm_ipc_close(old_instance);
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        crm_err("attrd is already active, aborting startup");
c54a00
+        crm_exit(EX_OK);
c54a00
+    } else {
c54a00
+        /* not up or not authentic, we'll proceed either way */
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        old_instance = NULL;
c54a00
+    }
c54a00
+
c54a00
     attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute);
c54a00
 
c54a00
     attrd_exit_status = attrd_cluster_connect();
c54a00
diff --git a/cib/main.c b/cib/main.c
c54a00
index 5473d40..7c745da 100644
c54a00
--- a/cib/main.c
c54a00
+++ b/cib/main.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU General Public License version 2
c54a00
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -135,13 +126,12 @@ main(int argc, char **argv)
c54a00
     int index = 0;
c54a00
     int argerr = 0;
c54a00
     struct passwd *pwentry = NULL;
c54a00
+    crm_ipc_t *old_instance = NULL;
c54a00
 
c54a00
     crm_log_preinit(NULL, argc, argv);
c54a00
     crm_set_options(NULL, "[options]",
c54a00
                     long_options, "Daemon for storing and replicating the cluster configuration");
c54a00
 
c54a00
-    crm_peer_init();
c54a00
-
c54a00
     mainloop_add_signal(SIGTERM, cib_shutdown);
c54a00
     mainloop_add_signal(SIGPIPE, cib_enable_writes);
c54a00
 
c54a00
@@ -216,6 +206,19 @@ main(int argc, char **argv)
c54a00
 
c54a00
     crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
c54a00
 
c54a00
+    old_instance = crm_ipc_new(cib_channel_ro, 0);
c54a00
+    if (crm_ipc_connect(old_instance)) {
c54a00
+        /* IPC end-point already up */
c54a00
+        crm_ipc_close(old_instance);
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        crm_err("cib is already active, aborting startup");
c54a00
+        crm_exit(EX_OK);
c54a00
+    } else {
c54a00
+        /* not up or not authentic, we'll proceed either way */
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        old_instance = NULL;
c54a00
+    }
c54a00
+
c54a00
     if (cib_root == NULL) {
c54a00
         if ((g_file_test(CRM_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == FALSE)
c54a00
             && (g_file_test(CRM_LEGACY_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == TRUE)) {
c54a00
@@ -238,6 +241,8 @@ main(int argc, char **argv)
c54a00
         return 100;
c54a00
     }
c54a00
 
c54a00
+    crm_peer_init();
c54a00
+
c54a00
     /* read local config file */
c54a00
     rc = cib_init();
c54a00
 
c54a00
diff --git a/crmd/main.c b/crmd/main.c
c54a00
index e8baa12..6eb7c03 100644
c54a00
--- a/crmd/main.c
c54a00
+++ b/crmd/main.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /* 
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
- * 
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
- * 
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- * 
c54a00
- * You should have received a copy of the GNU General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
+ *
c54a00
+ * The version control history for this file may have further details.
c54a00
+ *
c54a00
+ * This source code is licensed under the GNU General Public License version 2
c54a00
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -61,6 +52,7 @@ main(int argc, char **argv)
c54a00
     int flag;
c54a00
     int index = 0;
c54a00
     int argerr = 0;
c54a00
+    crm_ipc_t *old_instance = NULL;
c54a00
 
c54a00
     crmd_mainloop = g_main_new(FALSE);
c54a00
     crm_log_preinit(NULL, argc, argv);
c54a00
@@ -104,6 +96,19 @@ main(int argc, char **argv)
c54a00
         crm_help('?', EX_USAGE);
c54a00
     }
c54a00
 
c54a00
+    old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0);
c54a00
+    if (crm_ipc_connect(old_instance)) {
c54a00
+        /* IPC end-point already up */
c54a00
+        crm_ipc_close(old_instance);
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        crm_err("crmd is already active, aborting startup");
c54a00
+        crm_exit(EX_OK);
c54a00
+    } else {
c54a00
+        /* not up or not authentic, we'll proceed either way */
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        old_instance = NULL;
c54a00
+    }
c54a00
+
c54a00
     if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) {
c54a00
         crm_err("Terminating due to bad permissions on " PE_STATE_DIR);
c54a00
         fprintf(stderr,
c54a00
diff --git a/fencing/main.c b/fencing/main.c
c54a00
index 16663f6..c46c9a5 100644
c54a00
--- a/fencing/main.c
c54a00
+++ b/fencing/main.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2009 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2009-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU General Public License version 2
c54a00
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -1289,6 +1280,7 @@ main(int argc, char **argv)
c54a00
     int option_index = 0;
c54a00
     crm_cluster_t cluster;
c54a00
     const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" };
c54a00
+    crm_ipc_t *old_instance = NULL;
c54a00
 
c54a00
     crm_log_preinit("stonith-ng", argc, argv);
c54a00
     crm_set_options(NULL, "mode [options]", long_options,
c54a00
@@ -1459,6 +1451,20 @@ main(int argc, char **argv)
c54a00
     }
c54a00
 
c54a00
     crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
c54a00
+
c54a00
+    old_instance = crm_ipc_new("stonith-ng", 0);
c54a00
+    if (crm_ipc_connect(old_instance)) {
c54a00
+        /* IPC end-point already up */
c54a00
+        crm_ipc_close(old_instance);
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        crm_err("stonithd is already active, aborting startup");
c54a00
+        crm_exit(EX_OK);
c54a00
+    } else {
c54a00
+        /* not up or not authentic, we'll proceed either way */
c54a00
+        crm_ipc_destroy(old_instance);
c54a00
+        old_instance = NULL;
c54a00
+    }
c54a00
+
c54a00
     mainloop_add_signal(SIGTERM, stonith_shutdown);
c54a00
 
c54a00
     crm_peer_init();
c54a00
diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c
c54a00
index a61d492..c5ecc67 100644
c54a00
--- a/lib/cluster/cpg.c
c54a00
+++ b/lib/cluster/cpg.c
c54a00
@@ -385,6 +385,20 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void
c54a00
     return NULL;
c54a00
 }
c54a00
 
c54a00
+static int cmp_member_list_nodeid(const void *first,
c54a00
+                                  const void *second)
c54a00
+{
c54a00
+    const struct cpg_address *const a = *((const struct cpg_address **) first),
c54a00
+                             *const b = *((const struct cpg_address **) second);
c54a00
+    if (a->nodeid < b->nodeid) {
c54a00
+        return -1;
c54a00
+    } else if (a->nodeid > b->nodeid) {
c54a00
+        return 1;
c54a00
+    }
c54a00
+    /* don't bother with "reason" nor "pid" */
c54a00
+    return 0;
c54a00
+}
c54a00
+
c54a00
 void
c54a00
 pcmk_cpg_membership(cpg_handle_t handle,
c54a00
                     const struct cpg_name *groupName,
c54a00
@@ -396,29 +410,91 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
     gboolean found = FALSE;
c54a00
     static int counter = 0;
c54a00
     uint32_t local_nodeid = get_local_nodeid(handle);
c54a00
+    const struct cpg_address *key, **rival, **sorted;
c54a00
+
c54a00
+    sorted = malloc(member_list_entries * sizeof(const struct cpg_address *));
c54a00
+    CRM_ASSERT(sorted != NULL);
c54a00
+
c54a00
+    for (size_t iter = 0; iter < member_list_entries; iter++) {
c54a00
+        sorted[iter] = member_list + iter;
c54a00
+    }
c54a00
+    /* so that the cross-matching multiply-subscribed nodes is then cheap */
c54a00
+    qsort(sorted, member_list_entries, sizeof(const struct cpg_address *),
c54a00
+          cmp_member_list_nodeid);
c54a00
 
c54a00
     for (i = 0; i < left_list_entries; i++) {
c54a00
         crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL);
c54a00
 
c54a00
-        crm_info("Node %u left group %s (peer=%s, counter=%d.%d)",
c54a00
+        crm_info("Node %u left group %s (peer=%s:%llu, counter=%d.%d)",
c54a00
                  left_list[i].nodeid, groupName->value,
c54a00
-                 (peer? peer->uname : "<none>"), counter, i);
c54a00
+                 (peer? peer->uname : "<none>"),
c54a00
+                 (unsigned long long) left_list[i].pid, counter, i);
c54a00
+
c54a00
+        /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation
c54a00
+           and not playing by this rule may go wild in case of multiple
c54a00
+           residual instances of the same pacemaker daemon at the same node
c54a00
+           -- we must ensure that the possible local rival(s) won't make us
c54a00
+           cry out and bail (e.g. when they quit themselves), since all the
c54a00
+           surrounding logic denies this simple fact that the full membership
c54a00
+           is discriminated also per the PID of the process beside mere node
c54a00
+           ID (and implicitly, group ID); practically, this will be sound in
c54a00
+           terms of not preventing progress, since all the CPG joiners are
c54a00
+           also API end-point carriers, and that's what matters locally
c54a00
+           (who's the winner);
c54a00
+           remotely, we will just compare leave_list and member_list and if
c54a00
+           the left process has it's node retained in member_list (under some
c54a00
+           other PID, anyway) we will just ignore it as well
c54a00
+           XXX: long-term fix is to establish in-out PID-aware tracking? */
c54a00
         if (peer) {
c54a00
-            crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS);
c54a00
+            key = &left_list[i];
c54a00
+            rival = bsearch(&key, sorted, member_list_entries,
c54a00
+                            sizeof(const struct cpg_address *),
c54a00
+                            cmp_member_list_nodeid);
c54a00
+            if (rival == NULL) {
c54a00
+                crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg,
c54a00
+                                     OFFLINESTATUS);
c54a00
+            } else if (left_list[i].nodeid == local_nodeid) {
c54a00
+                crm_info("Ignoring the above event %s.%d, comes from a local"
c54a00
+                         " rival process (presumably not us): %llu",
c54a00
+                         groupName->value, counter,
c54a00
+                         (unsigned long long) left_list[i].pid);
c54a00
+            } else {
c54a00
+                crm_info("Ignoring the above event %s.%d, comes from"
c54a00
+                         " a rival-rich node: %llu (e.g. %llu process"
c54a00
+                         " carries on)",
c54a00
+                         groupName->value, counter,
c54a00
+                         (unsigned long long) left_list[i].pid,
c54a00
+                         (unsigned long long) (*rival)->pid);
c54a00
+            }
c54a00
         }
c54a00
     }
c54a00
+    free(sorted);
c54a00
+    sorted = NULL;
c54a00
 
c54a00
     for (i = 0; i < joined_list_entries; i++) {
c54a00
-        crm_info("Node %u joined group %s (counter=%d.%d)",
c54a00
-                 joined_list[i].nodeid, groupName->value, counter, i);
c54a00
+        crm_info("Node %u joined group %s (counter=%d.%d, pid=%llu,"
c54a00
+                 " unchecked for rivals)",
c54a00
+                 joined_list[i].nodeid, groupName->value, counter, i,
c54a00
+                 (unsigned long long) left_list[i].pid);
c54a00
     }
c54a00
 
c54a00
     for (i = 0; i < member_list_entries; i++) {
c54a00
         crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL);
c54a00
 
c54a00
-        crm_info("Node %u still member of group %s (peer=%s, counter=%d.%d)",
c54a00
+        crm_info("Node %u still member of group %s (peer=%s:%llu,"
c54a00
+                 " counter=%d.%d, at least once)",
c54a00
                  member_list[i].nodeid, groupName->value,
c54a00
-                 (peer? peer->uname : "<none>"), counter, i);
c54a00
+                 (peer? peer->uname : "<none>"), member_list[i].pid,
c54a00
+                 counter, i);
c54a00
+
c54a00
+        if (member_list[i].nodeid == local_nodeid
c54a00
+                && member_list[i].pid != getpid()) {
c54a00
+            /* see the note above */
c54a00
+            crm_info("Ignoring the above event %s.%d, comes from a local rival"
c54a00
+                     " process: %llu", groupName->value, counter,
c54a00
+                     (unsigned long long) member_list[i].pid);
c54a00
+            continue;
c54a00
+        }
c54a00
 
c54a00
         /* Anyone that is sending us CPG messages must also be a _CPG_ member.
c54a00
          * But it's _not_ safe to assume it's in the quorum membership.
c54a00
@@ -438,7 +514,9 @@ pcmk_cpg_membership(cpg_handle_t handle,
c54a00
                  *
c54a00
                  * Set the threshold to 1 minute
c54a00
                  */
c54a00
-                crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id);
c54a00
+                crm_err("Node %s[%u] appears to be online even though we think"
c54a00
+                        " it is dead (unchecked for rivals)",
c54a00
+                        peer->uname, peer->id);
c54a00
                 if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) {
c54a00
                     peer->votes = 0;
c54a00
                 }
c54a00
-- 
c54a00
1.8.3.1
c54a00
c54a00
c54a00
From 9dc38d81cb6e1967c368faed78de1927cabf06b3 Mon Sep 17 00:00:00 2001
c54a00
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
c54a00
Date: Wed, 17 Apr 2019 15:17:50 +0200
c54a00
Subject: [PATCH 7/7] Med: controld: fix possible NULL pointer dereference
c54a00
c54a00
This is now more likely triggerable once the problems related to
c54a00
CVE-2018-16878 are avoided.
c54a00
---
c54a00
 crmd/control.c | 32 +++++++++++++-------------------
c54a00
 1 file changed, 13 insertions(+), 19 deletions(-)
c54a00
c54a00
diff --git a/crmd/control.c b/crmd/control.c
c54a00
index e01066a..488ea88 100644
c54a00
--- a/crmd/control.c
c54a00
+++ b/crmd/control.c
c54a00
@@ -1,19 +1,10 @@
c54a00
 /*
c54a00
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
c54a00
+ * Copyright 2004-2019 the Pacemaker project contributors
c54a00
  *
c54a00
- * This program is free software; you can redistribute it and/or
c54a00
- * modify it under the terms of the GNU General Public
c54a00
- * License as published by the Free Software Foundation; either
c54a00
- * version 2 of the License, or (at your option) any later version.
c54a00
+ * The version control history for this file may have further details.
c54a00
  *
c54a00
- * This software is distributed in the hope that it will be useful,
c54a00
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
c54a00
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
c54a00
- * General Public License for more details.
c54a00
- *
c54a00
- * You should have received a copy of the GNU General Public
c54a00
- * License along with this library; if not, write to the Free Software
c54a00
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
c54a00
+ * This source code is licensed under the GNU General Public License version 2
c54a00
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
c54a00
  */
c54a00
 
c54a00
 #include <crm_internal.h>
c54a00
@@ -127,12 +118,15 @@ do_ha_control(long long action,
c54a00
             }
c54a00
 #endif
c54a00
         }
c54a00
-        controld_election_init(cluster->uname);
c54a00
-        fsa_our_uname = cluster->uname;
c54a00
-        fsa_our_uuid = cluster->uuid;
c54a00
-        if(cluster->uuid == NULL) {
c54a00
-            crm_err("Could not obtain local uuid");
c54a00
-            registered = FALSE;
c54a00
+
c54a00
+        if (registered == TRUE) {
c54a00
+            controld_election_init(cluster->uname);
c54a00
+            fsa_our_uname = cluster->uname;
c54a00
+            fsa_our_uuid = cluster->uuid;
c54a00
+            if(cluster->uuid == NULL) {
c54a00
+                crm_err("Could not obtain local uuid");
c54a00
+                registered = FALSE;
c54a00
+            }
c54a00
         }
c54a00
 
c54a00
         if (registered == FALSE) {
c54a00
-- 
c54a00
1.8.3.1
c54a00