diff --git a/SOURCES/002-security.patch b/SOURCES/002-security.patch new file mode 100644 index 0000000..e3701d0 --- /dev/null +++ b/SOURCES/002-security.patch @@ -0,0 +1,2441 @@ +From 32ded3e0172e0fae89cf70965e1c0406c1db883b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 2 Apr 2019 10:13:21 +0200 +Subject: [PATCH 1/7] High: libservices: fix use-after-free wrt. alert handling + +This could possibly lead to unsolicited information disclosure by the +means of standard output of the immediately preceding agent/resource +execution leaking into the log stream under some circumstances. +It was hence assigned CVE-2019-3885. + +The provoked pathological state of pacemaker-execd daemon progresses +towards crashing it for hitting segmentation fault. +--- + lib/services/services.c | 40 +--------------------------------------- + lib/services/services_linux.c | 35 +++++++++++++++++++++++++++++++---- + 2 files changed, 32 insertions(+), 43 deletions(-) + +diff --git a/lib/services/services.c b/lib/services/services.c +index 313567f..d3537d0 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -373,35 +373,6 @@ services_action_user(svc_action_t *op, const char *user) + return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid)); + } + +-static void +-set_alert_env(gpointer key, gpointer value, gpointer user_data) +-{ +- int rc; +- +- if (value) { +- rc = setenv(key, value, 1); +- } else { +- rc = unsetenv(key); +- } +- +- if (rc < 0) { +- crm_perror(LOG_ERR, "setenv %s=%s", +- (char*)key, (value? (char*)value : "")); +- } else { +- crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); +- } +-} +- +-static void +-unset_alert_env(gpointer key, gpointer value, gpointer user_data) +-{ +- if (unsetenv(key) < 0) { +- crm_perror(LOG_ERR, "unset %s", (char*)key); +- } else { +- crm_trace("unset %s", (char*)key); +- } +-} +- + /*! + * \brief Execute an alert agent action + * +@@ -416,18 +387,9 @@ unset_alert_env(gpointer key, gpointer value, gpointer user_data) + gboolean + services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op)) + { +- gboolean responsible; +- + action->synchronous = false; + action->opaque->callback = cb; +- if (action->params) { +- g_hash_table_foreach(action->params, set_alert_env, NULL); +- } +- responsible = services_os_action_execute(action); +- if (action->params) { +- g_hash_table_foreach(action->params, unset_alert_env, NULL); +- } +- return responsible; ++ return services_os_action_execute(action); + } + + #if SUPPORT_DBUS +diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c +index a04a8f9..90c1f44 100644 +--- a/lib/services/services_linux.c ++++ b/lib/services/services_linux.c +@@ -160,6 +160,25 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) + set_ocf_env(buffer, value, user_data); + } + ++static void ++set_alert_env(gpointer key, gpointer value, gpointer user_data) ++{ ++ int rc; ++ ++ if (value != NULL) { ++ rc = setenv(key, value, 1); ++ } else { ++ rc = unsetenv(key); ++ } ++ ++ if (rc < 0) { ++ crm_perror(LOG_ERR, "setenv %s=%s", ++ (char*)key, (value? (char*)value : "")); ++ } else { ++ crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); ++ } ++} ++ + /*! + * \internal + * \brief Add environment variables suitable for an action +@@ -169,12 +188,20 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) + static void + add_action_env_vars(const svc_action_t *op) + { +- if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) { +- return; ++ void (*env_setter)(gpointer, gpointer, gpointer) = NULL; ++ if (op->agent == NULL) { ++ env_setter = set_alert_env; /* we deal with alert handler */ ++ ++ } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF)) { ++ env_setter = set_ocf_env_with_prefix; + } + +- if (op->params) { +- g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); ++ if (env_setter != NULL && op->params != NULL) { ++ g_hash_table_foreach(op->params, env_setter, NULL); ++ } ++ ++ if (env_setter == NULL || env_setter == set_alert_env) { ++ return; + } + + set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); +-- +1.8.3.1 + + +From 912f5d9ce983339e939e4cc55f27791f8c9baa18 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Mon, 15 Apr 2019 17:09:50 +0200 +Subject: [PATCH 2/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (0/4) + +[0/4: make crm_pid_active more precise as to when detections fail] + +It would be bad if the function claimed the process is not active +when the only obstacle in the detection process was that none of the +detection methods worked for a plain lack of permissions to apply +them. Also, do some other minor cleanup of the function and add its +documentation. As an additional measure, log spamming is kept at +minimum for repeated queries about the same PID. +--- + include/crm/common/internal.h | 21 +++++++++++++ + lib/common/pid.c | 68 +++++++++++++++++++++++++++---------------- + 2 files changed, 64 insertions(+), 25 deletions(-) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index f2944f5..6775d14 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -49,7 +49,28 @@ void crm_schema_cleanup(void); + + /* internal functions related to process IDs (from pid.c) */ + ++/*! ++ * \internal ++ * \brief Detect if process per PID and optionally exe path (component) exists ++ * ++ * \param[in] pid PID of process assumed alive, disproving of which to try ++ * \param[in] daemon exe path (component) to possibly match with procfs entry ++ * ++ * \return -1 on invalid PID specification, -2 when the calling process has no ++ * (is refused an) ability to (dis)prove the predicate, ++ * 0 if the negation of the predicate is confirmed (check-through-kill ++ * indicates so, or the subsequent check-through-procfs-match on ++ * \p daemon when provided and procfs available at the standard path), ++ * 1 if it cannot be disproved (reliably [modulo race conditions] ++ * when \p daemon provided, procfs available at the standard path ++ * and the calling process has permissions to access the respective ++ * procfs location, less so otherwise, since mere check-through-kill ++ * is exercised without powers to exclude PID recycled in the interim). ++ * ++ * \note This function cannot be used to verify \e authenticity of the process. ++ */ + int crm_pid_active(long pid, const char *daemon); ++ + long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon); + long crm_read_pidfile(const char *filename); + int crm_lock_pidfile(const char *filename, const char *name); +diff --git a/lib/common/pid.c b/lib/common/pid.c +index 803799e..2439680 100644 +--- a/lib/common/pid.c ++++ b/lib/common/pid.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2004-2018 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -20,17 +22,21 @@ + int + crm_pid_active(long pid, const char *daemon) + { ++ static int last_asked_pid = 0; /* log spam prevention */ ++#if SUPPORT_PROCFS + static int have_proc_pid = 0; ++#else ++ static int have_proc_pid = -1; ++#endif ++ int rc = 0; + + if (have_proc_pid == 0) { ++ /* evaluation of /proc/PID/exe applicability via self-introspection */ + char proc_path[PATH_MAX], exe_path[PATH_MAX]; +- +- // Make sure pid hasn't been reused by another process + snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", +- (long unsigned int)getpid()); +- ++ (long unsigned int) getpid()); + have_proc_pid = 1; +- if (readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { ++ if (readlink(proc_path, exe_path, sizeof(exe_path) - 1) < 0) { + have_proc_pid = -1; + } + } +@@ -38,40 +44,52 @@ crm_pid_active(long pid, const char *daemon) + if (pid <= 0) { + return -1; + +- } else if ((kill(pid, 0) < 0) && (errno == ESRCH)) { +- return 0; ++ } else if ((rc = kill(pid, 0)) < 0 && errno == ESRCH) { ++ return 0; /* no such PID detected */ + +- } else if ((daemon == NULL) || (have_proc_pid == -1)) { +- return 1; ++ } else if (rc < 0 && have_proc_pid == -1) { ++ if (last_asked_pid != pid) { ++ crm_info("Cannot examine PID %ld: %s", pid, strerror(errno)); ++ last_asked_pid = pid; ++ } ++ return -2; /* errno != ESRCH */ ++ ++ } else if (rc == 0 && (daemon == NULL || have_proc_pid == -1)) { ++ return 1; /* kill as the only indicator, cannot double check */ + + } else { +- int rc = 0; ++ /* make sure PID hasn't been reused by another process ++ XXX: might still be just a zombie, which could confuse decisions */ ++ bool checked_through_kill = (rc == 0); + char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; +- +- // Make sure pid hasn't been reused by another process + snprintf(proc_path, sizeof(proc_path), "/proc/%ld/exe", pid); + +- rc = readlink(proc_path, exe_path, PATH_MAX - 1); ++ rc = readlink(proc_path, exe_path, sizeof(exe_path) - 1); + if ((rc < 0) && (errno == EACCES)) { +- crm_perror(LOG_INFO, "Could not read from %s", proc_path); +- return 1; ++ if (last_asked_pid != pid) { ++ crm_info("Could not read from %s: %s", proc_path, ++ strerror(errno)); ++ last_asked_pid = pid; ++ } ++ return checked_through_kill ? 1 : -2; + } else if (rc < 0) { +- crm_perror(LOG_ERR, "Could not read from %s", proc_path); +- return 0; ++ if (last_asked_pid != pid) { ++ crm_err("Could not read from %s: %s (%d)", proc_path, ++ strerror(errno), errno); ++ last_asked_pid = pid; ++ } ++ return 0; /* most likely errno == ENOENT */ + } +- +- exe_path[rc] = 0; ++ exe_path[rc] = '\0'; + + if (daemon[0] != '/') { +- rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", ++ rc = snprintf(myexe_path, sizeof(myexe_path), CRM_DAEMON_DIR"/%s", + daemon); +- myexe_path[rc] = 0; + } else { +- rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon); +- myexe_path[rc] = 0; ++ rc = snprintf(myexe_path, sizeof(myexe_path), "%s", daemon); + } + +- if (strcmp(exe_path, myexe_path) == 0) { ++ if (rc > 0 && rc < sizeof(myexe_path) && !strcmp(exe_path, myexe_path)) { + return 1; + } + } +-- +1.8.3.1 + + +From 1148f45da977113dff588cdd1cfebb7a47760b32 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Mon, 15 Apr 2019 17:10:07 +0200 +Subject: [PATCH 3/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (1/4) + +[1/4: new helpers to allow IPC client side to authenticate the server] + +The title problem here could possibly lead to local privilege escalation +up to the root's level (and implicitly unguarded by some additional +protection layers like SELinux unless the defaults constrained further). + +Main problem is that the authenticity assumptions were built on two, +seemingly mutually supporting legs leading to two CVEs assigned: + +* procfs (mere process existence and right path to binary check) + used to verify (this part was assigned CVE-2018-16878), and + +* one-way only client-server authentication, putting the client + here at the mercy of the server not necessarily cross-verified + per the above point if at all (this part was assigned + CVE-2018-16877) + +whereas these two were in fact orthogonal, tearing security +assumptions about the "passive influencers" in the pacemaker's daemon +resilience-friendly constellation (orchestrated by the main of them, +pacemakerd) apart. Moreover, procfs-based approach is discouraged +for other reasons. + +The layout of the basic fix is as follows: +* 1/4: new helpers to allow IPC client side to authenticate the server + (this commit, along with unifying subsequent solution for + both CVEs) +* 2/4: pacemakerd to trust pre-existing processes via new checks instead + (along with unifying solution for both CVEs) +* 3/4: other daemons to authenticate IPC servers of fellow processes + (along with addressing CVE-2018-16877 alone, for parts of + pacemaker not covered earlier) +* 4/4: CPG users to be careful about now-more-probable rival processes + (this is merely to mitigate corner case fallout from the new + approaches taken to face CVE-2018-16878 in particular; + courtesy of Yan Gao of SUSE for the reporting this) + +With "basic", it is meant that it constitutes a self-contained best +effort solution with some compromises that can only be overcome with the +assistance of IPC library, libqb, as is also elaborated in messages of +remaining "fix" commits. Beside that, also conventional encapsulation +of server-by-client authentication would be useful, but lack thereof +is not an obstacle (more so should there by any security related +neglectations on the IPC client side and its connection initiating +arrangement within libqb that has a potential to strike as early as +when the authenticity of the server side is yet to be examined). + +One extra kludge that's introduced for FreeBSD lacking Unix socket to +remote peer PID mapping is masquerading such an unspecified PID with +value of 1, since that shall always be around as "init" task and, +deferring to proof by contradiction, cannot be pacemakerd-spawned +child either even if PID 1 was pacemakerd (and running such a child +alone is rather nonsensical). The code making decisions based on that +value must acknowledge this craze and refrain from killing/signalling +the underlying process on this platform (but shall in general follow +the same elsewhere, keep in mind systemd socket-based activation for +instance, which would end up in such a situation easily!). +--- + configure.ac | 43 +++++++++++ + include/crm/common/Makefile.am | 3 +- + include/crm/common/ipc.h | 40 ++++++++++- + include/crm/common/ipc_internal.h | 69 ++++++++++++++++++ + lib/common/ipc.c | 145 +++++++++++++++++++++++++++++++++++++- + 5 files changed, 296 insertions(+), 4 deletions(-) + create mode 100644 include/crm/common/ipc_internal.h + +diff --git a/configure.ac b/configure.ac +index f272587..7228989 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -425,6 +425,48 @@ do + fi + done + ++us_auth= ++AC_CHECK_HEADER([sys/socket.h], [ ++ AC_CHECK_DECL([SO_PEERCRED], [ ++ # Linux ++ AC_CHECK_TYPE([struct ucred], [ ++ us_auth=peercred_ucred; ++ AC_DEFINE([US_AUTH_PEERCRED_UCRED], [1], ++ [Define if Unix socket auth method is ++ getsockopt(s, SO_PEERCRED, &ucred, ...)]) ++ ], [ ++ # OpenBSD ++ AC_CHECK_TYPE([struct sockpeercred], [ ++ us_auth=localpeercred_sockepeercred; ++ AC_DEFINE([US_AUTH_PEERCRED_SOCKPEERCRED], [1], ++ [Define if Unix socket auth method is ++ getsockopt(s, SO_PEERCRED, &sockpeercred, ...)]) ++ ], [], [[#include ]]) ++ ], [[#define _GNU_SOURCE ++ #include ]]) ++ ], [], [[#include ]]) ++]) ++ ++if test -z "${us_auth}"; then ++ # FreeBSD ++ AC_CHECK_DECL([getpeereid], [ ++ us_auth=getpeereid; ++ AC_DEFINE([US_AUTH_GETPEEREID], [1], ++ [Define if Unix socket auth method is ++ getpeereid(s, &uid, &gid)]) ++ ], [ ++ # Solaris/OpenIndiana ++ AC_CHECK_DECL([getpeerucred], [ ++ us_auth=getpeerucred; ++ AC_DEFINE([US_AUTH_GETPEERUCRED], [1], ++ [Define if Unix socket auth method is ++ getpeercred(s, &ucred)]) ++ ], [ ++ AC_MSG_ERROR([No way to authenticate a Unix socket peer]) ++ ], [[#include ]]) ++ ]) ++fi ++ + dnl This OS-based decision-making is poor autotools practice; + dnl feature-based mechanisms are strongly preferred. + dnl +@@ -1845,3 +1887,4 @@ AC_MSG_RESULT([ LDFLAGS_HARDENED_EXE = ${LDFLAGS_HARDENED_EXE}]) + AC_MSG_RESULT([ LDFLAGS_HARDENED_LIB = ${LDFLAGS_HARDENED_LIB}]) + AC_MSG_RESULT([ Libraries = ${LIBS}]) + AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) ++AC_MSG_RESULT([ Unix socket auth method = ${us_auth}]) +diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am +index 7400152..be19e5e 100644 +--- a/include/crm/common/Makefile.am ++++ b/include/crm/common/Makefile.am +@@ -14,3 +14,4 @@ headerdir=$(pkgincludedir)/crm/common + header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h + noinst_HEADERS = cib_secrets.h ipcs.h internal.h alerts_internal.h \ +- iso8601_internal.h remote_internal.h xml_internal.h ++ iso8601_internal.h remote_internal.h xml_internal.h \ ++ ipc_internal.h +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index 84a3a71..3249662 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -1,5 +1,7 @@ + /* +- * Copyright 2013-2018 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -75,6 +75,44 @@ uint32_t crm_ipc_buffer_flags(crm_ipc_t * client); + const char *crm_ipc_name(crm_ipc_t * client); + unsigned int crm_ipc_default_buffer_size(void); + ++/*! ++ * \brief Check the authenticity of the IPC socket peer process ++ * ++ * If everything goes well, peer's authenticity is verified by the means ++ * of comparing against provided referential UID and GID (either satisfies), ++ * and the result of this check can be deduced from the return value. ++ * As an exception, detected UID of 0 ("root") satisfies arbitrary ++ * provided referential daemon's credentials. ++ * ++ * \param[in] sock IPC related, connected Unix socket to check peer of ++ * \param[in] refuid referential UID to check against ++ * \param[in] refgid referential GID to check against ++ * \param[out] gotpid to optionally store obtained PID of the peer ++ * (not available on FreeBSD, special value of 1 ++ * used instead, and the caller is required to ++ * special case this value respectively) ++ * \param[out] gotuid to optionally store obtained UID of the peer ++ * \param[out] gotgid to optionally store obtained GID of the peer ++ * ++ * \return 0 if IPC related socket's peer is not authentic given the ++ * referential credentials (see above), 1 if it is, ++ * negative value on error (generally expressing -errno unless ++ * it was zero even on nonhappy path, -pcmk_err_generic is ++ * returned then; no message is directly emitted) ++ * ++ * \note While this function is tolerant on what constitutes authorized ++ * IPC daemon process (its effective user matches UID=0 or \p refuid, ++ * or at least its group matches \p refroup), either or both (in case ++ * of UID=0) mismatches on the expected credentials of such peer ++ * process \e shall be investigated at the caller when value of 1 ++ * gets returned there, since higher-than-expected privileges in ++ * respect to the expected/intended credentials possibly violate ++ * the least privilege principle and may pose an additional risk ++ * (i.e. such accidental inconsistency shall be eventually fixed). ++ */ ++int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, ++ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); ++ + /* Utils */ + xmlNode *create_hello_message(const char *uuid, const char *client_name, + const char *major_version, const char *minor_version); +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +new file mode 100644 +index 0000000..41a6653 +--- /dev/null ++++ b/include/crm/common/ipc_internal.h +@@ -0,0 +1,69 @@ ++/* ++ * Copyright 2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#ifndef PCMK__IPC_INTERNAL_H ++#define PCMK__IPC_INTERNAL_H ++ ++#include ++ ++#include /* US_AUTH_GETPEEREID */ ++ ++ ++/* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that ++ require a delicate handling anyway (socket-based activation with systemd); ++ we can be reasonably sure that this PID is never possessed by the actual ++ child daemon, as it gets taken either by the proper init, or by pacemakerd ++ itself (i.e. this precludes anything else); note that value of zero ++ is meant to carry "unset" meaning, and better not to bet on/conditionalize ++ over signedness of pid_t */ ++#define PCMK__SPECIAL_PID 1 ++ ++#if defined(US_AUTH_GETPEEREID) ++/* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to ++ "IPC liveness check only") as its nominal representation, which could ++ cause confusion -- this is unambiguous as long as there's no ++ socket-based activation like with systemd (very improbable) */ ++#define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p)) ++#else ++#define PCMK__SPECIAL_PID_AS_0(p) (p) ++#endif ++ ++/*! ++ * \internal ++ * \brief Check the authenticity and liveness of the process via IPC end-point ++ * ++ * When IPC daemon under given IPC end-point (name) detected, its authenticity ++ * is verified by the means of comparing against provided referential UID and ++ * GID, and the result of this check can be deduced from the return value. ++ * As an exception, referential UID of 0 (~ root) satisfies arbitrary ++ * detected daemon's credentials. ++ * ++ * \param[in] name IPC name to base the search on ++ * \param[in] refuid referential UID to check against ++ * \param[in] refgid referential GID to check against ++ * \param[out] gotpid to optionally store obtained PID of the found process ++ * upon returning 1 or -2 ++ * (not available on FreeBSD, special value of 1, ++ * see PCMK__SPECIAL_PID, used instead, and the caller ++ * is required to special case this value respectively) ++ * ++ * \return 0 if no trace of IPC peer's liveness detected, 1 if it was, ++ * -1 on error, and -2 when the IPC blocked with unauthorized ++ * process (log message emitted in both latter cases) ++ * ++ * \note This function emits a log message also in case there isn't a perfect ++ * match in respect to \p reguid and/or \p refgid, for a possible ++ * least privilege principle violation. ++ * ++ * \see crm_ipc_is_authentic_process ++ */ ++int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, ++ gid_t refgid, pid_t *gotpid); ++ ++#endif +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index c582ccf..aa055d3 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2004-2018 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -7,6 +9,17 @@ + + #include + ++#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++# ifdef US_AUTH_PEERCRED_UCRED ++# ifndef _GNU_SOURCE ++# define _GNU_SOURCE ++# endif ++# endif ++# include ++#elif defined(US_AUTH_GETPEERUCRED) ++# include ++#endif ++ + #include + + #include +@@ -19,11 +32,13 @@ + #include + #include + +-#include ++#include /* indirectly: pcmk_err_generic */ + #include + #include + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + #define PCMK_IPC_VERSION 1 + + /* Evict clients whose event queue grows this large (by default) */ +@@ -1394,6 +1409,132 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in + return rc; + } + ++int ++crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, ++ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { ++ int ret = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++#if defined(US_AUTH_PEERCRED_UCRED) ++ struct ucred ucred; ++ socklen_t ucred_len = sizeof(ucred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &ucred, &ucred_len) ++ && ucred_len == sizeof(ucred)) { ++ found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; ++ ++#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++ struct sockpeercred sockpeercred; ++ socklen_t sockpeercred_len = sizeof(sockpeercred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &sockpeercred, &sockpeercred_len) ++ && sockpeercred_len == sizeof(sockpeercred_len)) { ++ found_pid = sockpeercred.pid; ++ found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; ++ ++#elif defined(US_AUTH_GETPEEREID) ++ if (!getpeereid(sock, &found_uid, &found_gid)) { ++ found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ ++ ++#elif defined(US_AUTH_GETPEERUCRED) ++ ucred_t *ucred; ++ if (!getpeerucred(sock, &ucred)) { ++ errno = 0; ++ found_pid = ucred_getpid(ucred); ++ found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ++ ret = -errno; ++ ucred_free(ucred); ++ if (ret) { ++ return (ret < 0) ? ret : -pcmk_err_generic; ++ } ++ ++#else ++# error "No way to authenticate a Unix socket peer" ++ errno = 0; ++ if (0) { ++#endif ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ if (gotuid != NULL) { ++ *gotuid = found_uid; ++ } ++ if (gotgid != NULL) { ++ *gotgid = found_gid; ++ } ++ ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); ++ } else { ++ ret = (errno > 0) ? -errno : -pcmk_err_generic; ++ } ++ ++ return ret; ++} ++ ++int ++pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, ++ gid_t refgid, pid_t *gotpid) { ++ static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ ++ int fd, ret = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ qb_ipcc_connection_t *c; ++ ++ if ((c = qb_ipcc_connect(name, 0)) == NULL) { ++ crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); ++ ++ } else if ((ret = qb_ipcc_fd_get(c, &fd))) { ++ crm_err("Could not get fd from %s IPC: %s (%d)", name, ++ strerror(-ret), -ret); ++ ret = -1; ++ ++ } else if ((ret = crm_ipc_is_authentic_process(fd, refuid, refgid, ++ &found_pid, &found_uid, ++ &found_gid)) < 0) { ++ if (ret == -pcmk_err_generic) { ++ crm_err("Could not get peer credentials from %s IPC", name); ++ } else { ++ crm_err("Could not get peer credentials from %s IPC: %s (%d)", ++ name, strerror(-ret), -ret); ++ } ++ ret = -1; ++ ++ } else { ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ ++ if (!ret) { ++ crm_err("Daemon (IPC %s) effectively blocked with unauthorized" ++ " process %lld (uid: %lld, gid: %lld)", ++ name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ ret = -2; ++ } else if ((found_uid != refuid || found_gid != refgid) ++ && strncmp(last_asked_name, name, sizeof(last_asked_name))) { ++ if (!found_uid && refuid) { ++ crm_warn("Daemon (IPC %s) runs as root, whereas the expected" ++ " credentials are %lld:%lld, hazard of violating" ++ " the least privilege principle", ++ name, (long long) refuid, (long long) refgid); ++ } else { ++ crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" ++ " expected credentials are %lld:%lld, which may" ++ " mean a different set of privileges than expected", ++ name, (long long) found_uid, (long long) found_gid, ++ (long long) refuid, (long long) refgid); ++ } ++ memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); ++ } ++ } ++ ++ if (ret) { /* here, !ret only when we could not initially connect */ ++ qb_ipcc_disconnect(c); ++ } ++ ++ return ret; ++} ++ ++ + /* Utils */ + + xmlNode * +-- +1.8.3.1 + + +From 970736b1c7ad5c78cc5295a4231e546104d55893 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:13:06 +0200 +Subject: [PATCH 4/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (2/4) + +[2/4: pacemakerd to trust pre-existing processes via new checks instead] + +In pacemakerd in the context of entrusting pre-existing processes, +we now resort to procfs-based solution only in boundary, fouled cases, +and primarily examine the credentials of the processes already +occupying known IPC end-points before adopting them. + +The commit applies the new helpers from 1/1 so as to close the both +related sensitive problems, CVE-2018-16877 and CVE-2018-16878, in +a unified manner, this time limited to the main daemon of pacemaker +(pacemakerd). + +To be noted that it is clearly not 100% for this purpose for still +allowing for TOCTTOU, but that's what commit (3/3) is meant to solve +for the most part, plus there may be optimizations solving this concern +as a side effect, but that requires an active assistance on the libqb +side (https://github.com/ClusterLabs/libqb/issues/325) since any +improvement on pacemaker side in isolation would be very +cumbersome if generally possible at all, but either way +means a new, soft compatibility encumberment. + +As a follow-up to what was put in preceding 1/3 commit, PID of 1 tracked +as child's identification on FreeBSD (or when socket-based activation is +used with systemd) is treated specially, incl. special precaution with +child's PID discovered as 1 elsewhere. + +v2: courtesy of Yan Gao of SUSE for early discovery and report for + what's primarily solved with 4/4 commit, in extension, child + daemons in the initialization phase coinciding with IPC-feasibility + based process scan in pacemakerd in a way that those are missed + (although they are to come up fully just moments later only + to interfere with naturally spawned ones) are now considered so + that if any native children later fail for said clash, the + pre-existing counterpart may get adopted instead of ending up + with repeated spawn-bury loop ad nauseam without real progress + (note that PCMK_fail_fast=true could possibly help, but that's + rather a big hammer not suitable for all the use cases, not + the ones we try to deal with gracefully here) +--- + daemons/pacemakerd/pacemakerd.c | 406 ++++++++++++++++++++++++++++++++++------ + 1 file changed, 345 insertions(+), 61 deletions(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index eca7f4d..bebf938 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -12,17 +12,23 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include + ++#include /* indirectly: CRM_EX_* */ ++#include /* cib_channel_ro */ + #include + #include + #include + #include + #include ++ ++#include /* PCMK__SPECIAL_PID*, ... */ ++ + #ifdef SUPPORT_COROSYNC + #include + #endif +@@ -33,6 +39,7 @@ + static gboolean pcmk_quorate = FALSE; + static gboolean fatal_error = FALSE; + static GMainLoop *mainloop = NULL; ++static bool global_keep_tracking = false; + + #define PCMK_PROCESS_CHECK_INTERVAL 5 + +@@ -50,6 +57,7 @@ typedef struct pcmk_child_s { + const char *name; + const char *uid; + const char *command; ++ const char *endpoint; /* IPC server name */ + + gboolean active_before_startup; + } pcmk_child_t; +@@ -64,30 +72,38 @@ static pcmk_child_t pcmk_children[] = { + }, + { + 0, crm_proc_execd, 3, 0, TRUE, "pacemaker-execd", +- NULL, CRM_DAEMON_DIR "/pacemaker-execd" ++ NULL, CRM_DAEMON_DIR "/pacemaker-execd", ++ CRM_SYSTEM_LRMD + }, + { + 0, crm_proc_based, 1, 0, TRUE, "pacemaker-based", +- CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-based" ++ CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-based", ++ CIB_CHANNEL_RO + }, + { + 0, crm_proc_controld, 6, 0, TRUE, "pacemaker-controld", +- CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-controld" ++ CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-controld", ++ CRM_SYSTEM_CRMD + }, + { + 0, crm_proc_attrd, 4, 0, TRUE, "pacemaker-attrd", +- CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd" ++ CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd", ++ T_ATTRD + }, + { + 0, crm_proc_schedulerd, 5, 0, TRUE, "pacemaker-schedulerd", +- CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-schedulerd" ++ CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-schedulerd", ++ CRM_SYSTEM_PENGINE + }, + { + 0, crm_proc_fenced, 2, 0, TRUE, "pacemaker-fenced", +- NULL, CRM_DAEMON_DIR "/pacemaker-fenced" ++ NULL, CRM_DAEMON_DIR "/pacemaker-fenced", ++ "stonith-ng" + }, + }; + ++static gboolean check_active_before_startup_processes(gpointer user_data); ++static int pcmk_child_active(pcmk_child_t *child); + static gboolean start_child(pcmk_child_t * child); + static gboolean update_node_processes(uint32_t id, const char *uname, + uint32_t procs); +@@ -130,14 +146,31 @@ pcmk_process_exit(pcmk_child_t * child) + } + + if (shutdown_trigger) { ++ /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ + mainloop_set_trigger(shutdown_trigger); ++ /* intended to speed up propagating expected lay-off of the daemons? */ + update_node_processes(local_nodeid, NULL, get_process_list()); + +- } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { ++ } else if (!child->respawn) { ++ /* nothing to do */ ++ ++ } else if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_err("Rebooting system because of %s", child->name); + pcmk_panic(__FUNCTION__); + +- } else if (child->respawn) { ++ } else if (pcmk_child_active(child) == 1) { ++ crm_warn("One-off suppressing strict respawning of a child process %s," ++ " appears alright per %s IPC end-point", ++ child->name, child->endpoint); ++ /* need to monitor how it evolves, and start new process if badly */ ++ child->active_before_startup = TRUE; ++ if (!global_keep_tracking) { ++ global_keep_tracking = true; ++ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, ++ check_active_before_startup_processes, NULL); ++ } ++ ++ } else { + crm_notice("Respawning failed child process: %s", child->name); + start_child(child); + } +@@ -218,8 +251,13 @@ stop_child(pcmk_child_t * child, int signal) + signal = SIGTERM; + } + +- if (child->command == NULL) { +- crm_debug("Nothing to do for child \"%s\"", child->name); ++ /* why to skip PID of 1? ++ - FreeBSD ~ how untrackable process behind IPC is masqueraded as ++ - elsewhere: how "init" task is designated; in particular, in systemd ++ arrangement of socket-based activation, this is pretty real */ ++ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { ++ crm_debug("Nothing to do for child \"%s\" (process %lld)", ++ child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + return TRUE; + } + +@@ -244,6 +282,11 @@ stop_child(pcmk_child_t * child, int signal) + static char *opts_default[] = { NULL, NULL }; + static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; + ++/* TODO once libqb is taught to juggle with IPC end-points carried over as ++ bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) ++ it shall hand over these descriptors here if/once they are successfully ++ pre-opened in (presumably) pcmk_child_active, to avoid any remaining ++ room for races */ + static gboolean + start_child(pcmk_child_t * child) + { +@@ -380,7 +423,10 @@ escalate_shutdown(gpointer data) + + pcmk_child_t *child = data; + +- if (child->pid) { ++ if (child->pid == PCMK__SPECIAL_PID) { ++ pcmk_process_exit(child); ++ ++ } else if (child->pid) { + /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ + crm_err("Child %s not terminating in a timely manner, forcing", child->name); + stop_child(child, SIGSEGV); +@@ -388,6 +434,8 @@ escalate_shutdown(gpointer data) + return FALSE; + } + ++#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ ++ + static gboolean + pcmk_shutdown_worker(gpointer user_data) + { +@@ -416,11 +464,24 @@ pcmk_shutdown_worker(gpointer user_data) + time_t now = time(NULL); + + if (child->respawn) { ++ if (child->pid == PCMK__SPECIAL_PID) { ++ crm_warn("The process behind %s IPC cannot be" ++ " terminated, so either wait the graceful" ++ " period of %ld s for its native termination" ++ " if it vitally depends on some other daemons" ++ " going down in a controlled way already," ++ " or locate and kill the correct %s process" ++ " on your own; set PCMK_fail_fast=1 to avoid" ++ " this altogether next time around", ++ child->name, (long) SHUTDOWN_ESCALATION_PERIOD, ++ child->command); ++ } + next_log = now + 30; + child->respawn = FALSE; + stop_child(child, SIGTERM); + if (phase < pcmk_children[PCMK_CHILD_CONTROLD].start_seq) { +- g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); ++ g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, ++ escalate_shutdown, child); + } + + } else if (now >= next_log) { +@@ -702,7 +763,102 @@ mcp_chown(const char *path, uid_t uid, gid_t gid) + } + } + +-#if SUPPORT_PROCFS ++/*! ++ * \internal ++ * \brief Check the liveness of the child based on IPC name and PID if tracked ++ * ++ * \param[inout] child Child tracked data ++ * ++ * \return 0 if no trace of child's liveness detected (while it is detectable ++ * to begin with, at least according to one of the two properties), ++ * 1 if everything is fine, 2 if it's up per PID, but not per IPC ++ * end-point (still starting?), -1 on error, and -2 when the child ++ * (its IPC) blocked with an unauthorized process (log message ++ * emitted in both latter cases) ++ * ++ * \note This function doesn't modify any of \p child members but \c pid, ++ * and is not actively toying with processes as such but invoking ++ * \c stop_child in one particular case (there's for some reason ++ * a different authentic holder of the IPC end-point). ++ */ ++static int ++pcmk_child_active(pcmk_child_t *child) { ++ static uid_t cl_uid = 0; ++ static gid_t cl_gid = 0; ++ const uid_t root_uid = 0; ++ const gid_t root_gid = 0; ++ const uid_t *ref_uid; ++ const gid_t *ref_gid; ++ int ret = 0; ++ pid_t ipc_pid = 0; ++ ++ if (child->endpoint == NULL ++ && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { ++ crm_err("Cannot track child %s for missing both API end-point and PID", ++ child->name); ++ ret = -1; /* misuse of the function when child is not trackable */ ++ ++ } else if (child->endpoint != NULL) { ++ ++ ref_uid = (child->uid != NULL) ? &cl_uid : &root_uid; ++ ref_gid = (child->uid != NULL) ? &cl_gid : &root_gid; ++ ++ if (child->uid != NULL && !cl_uid && !cl_gid ++ && crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid) < 0) { ++ crm_err("Could not find user and group IDs for user %s", ++ CRM_DAEMON_USER); ++ ret = -1; ++ } else if ((ret = pcmk__ipc_is_authentic_process_active(child->endpoint, ++ *ref_uid, *ref_gid, ++ &ipc_pid)) < 0) { ++ /* game over */ ++ } else if (child->pid <= 0) { ++ /* hit new child to be initialized, or reset to zero ++ and investigate further for ret == 0 */ ++ child->pid = ipc_pid; ++ } else if (ipc_pid && child->pid != ipc_pid) { ++ /* ultimately strange for ret == 1; either way, investigate */ ++ ret = 0; ++ } ++ } ++ ++ if (!ret) { ++ /* when no IPC based liveness detected (incl. if ever a child without ++ IPC is tracked), or detected for a different _authentic_ process; ++ safe on FreeBSD since the only change possible from a proper child's ++ PID into "special" PID of 1 behind more loosely related process */ ++ ret = crm_pid_active(child->pid, child->name); ++ if (ipc_pid && (ret != 1 ++ || ipc_pid == PCMK__SPECIAL_PID ++ || crm_pid_active(ipc_pid, child->name) == 1)) { ++ if (ret == 1) { ++ /* assume there's no forking-while-retaining-IPC-socket ++ involved in the "children's" lifecycle, hence that the ++ tracking got out of sync purely because of some external ++ (esotheric?) forces (user initiated process "refresh" by ++ force? or intentionally racing on start-up, even?), and ++ that switching over to this other detected, authentic ++ instance with an IPC already in possession is a better ++ trade-off than "neutralizing" it first so as to give ++ either the original or possibly a new to-be-spawned ++ daemon process a leeway for operation, which would ++ otherwise have to be carried out */ ++ /* not possessing IPC, afterall (what about corosync CPG?) */ ++ stop_child(child, SIGKILL); ++ } else { ++ ret = 1; ++ } ++ child->pid = ipc_pid; ++ } else if (ret == 1) { ++ ret = 2; /* up per PID, but not per IPC (still starting?) */ ++ } else if (!child->pid && ret == -1) { ++ ret = 0; /* correct -1 on FreeBSD from above back to 0 */ ++ } ++ } ++ ++ return ret; ++} ++ + static gboolean + check_active_before_startup_processes(gpointer user_data) + { +@@ -719,12 +875,41 @@ check_active_before_startup_processes(gpointer user_data) + continue; + } else { + const char *name = pcmk_children[lpc].name; +- +- if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) { +- crm_notice("Process %s terminated (pid=%d)", +- name, pcmk_children[lpc].pid); +- pcmk_process_exit(&(pcmk_children[lpc])); +- continue; ++ int ret; ++ ++ switch ((ret = pcmk_child_active(&pcmk_children[lpc]))) { ++ case 1: ++ break; ++ case 0: ++ case 2: /* this very case: it was OK once already */ ++ if (pcmk_children[lpc].respawn == TRUE) { ++ /* presumably after crash, hence critical */ ++ crm_crit("Process %s terminated (pid=%lld)%s", \ ++ name, (long long) ++ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ++ ret ? ", at least per IPC end-point that went AWOL" ++ : ""); ++ } else { ++ /* orderly shutdown */ ++ crm_notice("Process %s terminated (pid=%lld)%s", \ ++ name, (long long) ++ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ++ ret ? ", at least per IPC end-point that went AWOL" ++ : ""); ++ } ++ pcmk_process_exit(&(pcmk_children[lpc])); ++ continue; ++ default: ++ crm_crit("Unexpected value from pcmk_child_active:" ++ " %d (pid=%lld)", ret, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[lpc].pid)); ++ /* fall through */ ++ case -1: ++ case -2: ++ /* message(s) already emitted */ ++ crm_exit(CRM_EX_FATAL); ++ break; /* static analysis/noreturn */ + } + } + /* at least one of the processes found at startup +@@ -733,57 +918,147 @@ check_active_before_startup_processes(gpointer user_data) + } + } + ++ global_keep_tracking = keep_tracking; + return keep_tracking; + } +-#endif // SUPPORT_PROCFS + +-static void ++/*! ++ * \internal ++ * \brief Initial one-off check of the pre-existing "child" processes ++ * ++ * With "child" process, we mean the subdaemon that defines an API end-point ++ * (all of them do as of the comment) -- the possible complement is skipped ++ * as it is deemed it has no such shared resources to cause conflicts about, ++ * hence it can presumably be started anew without hesitation. ++ * If that won't hold true in the future, the concept of a shared resource ++ * will have to be generalized beyond the API end-point. ++ * ++ * For boundary cases that the "child" is still starting (IPC end-point is yet ++ * to be witnessed), or more rarely (practically FreeBSD only), when there's ++ * a pre-existing "untrackable" authentic process, we give the situation some ++ * time to possibly unfold in the right direction, meaning that said socket ++ * will appear or the unattainable process will disappear per the observable ++ * IPC, respectively. ++ * ++ * \return 0 if no such "child" process found, positive number X when X ++ * "children" detected, -1 on an internal error, -2 when any ++ * would-be-used IPC is blocked with an unauthorized process ++ * ++ * \note Since this gets run at the very start, \c respawn_count fields ++ * for particular children get temporarily overloaded with "rounds ++ * of waiting" tracking, restored once we are about to finish with ++ * success (i.e. returning value >=0) and will remain unrestored ++ * otherwise. One way to suppress liveness detection logic for ++ * particular child is to set the said value to a negative number. ++ */ ++#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ ++static int + find_and_track_existing_processes(void) + { +-#if SUPPORT_PROCFS +- DIR *dp; +- struct dirent *entry; +- bool start_tracker = FALSE; +- char entry_name[16]; +- +- dp = opendir("/proc"); +- if (!dp) { +- /* no proc directory to search through */ +- crm_notice("Can not read /proc directory to track existing components"); +- return; +- } +- +- while ((entry = readdir(dp)) != NULL) { +- int pid; +- int max = SIZEOF(pcmk_children); +- int i; +- +- if (crm_procfs_process_info(entry, entry_name, &pid) < 0) { +- continue; +- } +- for (i = 0; i < max; i++) { +- if ((pcmk_children[i].start_seq != 0) +- && !strncmp(entry_name, pcmk_children[i].name, 15) +- && (crm_pid_active(pid, NULL) == 1)) { +- +- crm_notice("Tracking existing %s process (pid=%d)", +- pcmk_children[i].name, pid); +- pcmk_children[i].pid = pid; +- pcmk_children[i].active_before_startup = TRUE; +- start_tracker = TRUE; +- break; ++ unsigned tracking = 0U; ++ bool wait_in_progress; ++ int cur; ++ size_t i, rounds; ++ ++ for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { ++ wait_in_progress = false; ++ for (i = 0; i < SIZEOF(pcmk_children); i++) { ++ if (!pcmk_children[i].endpoint ++ || pcmk_children[i].respawn_count < 0 ++ || !(cur = pcmk_child_active(&pcmk_children[i]))) { ++ /* as a speculation, don't give up in the context of ++ pcmk_child_active check if there are more rounds to ++ come for other reasons, but don't artificially wait just ++ because of this, since we would preferably start ASAP */ ++ continue; ++ } ++ pcmk_children[i].respawn_count = rounds; ++ switch (cur) { ++ case 1: ++ if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { ++ if (crm_is_true(getenv("PCMK_fail_fast"))) { ++ crm_crit("Cannot reliably track pre-existing" ++ " authentic process behind %s IPC on this" ++ " platform and PCMK_fail_fast requested", ++ pcmk_children[i].endpoint); ++ return -1; ++ } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { ++ crm_notice("Assuming pre-existing authentic, though" ++ " on this platform untrackable, process" ++ " behind %s IPC is stable (was in %d" ++ " previous samples) so rather than" ++ " bailing out (PCMK_fail_fast not" ++ " requested), we just switch to a less" ++ " optimal IPC liveness monitoring" ++ " (not very suitable for heavy load)", ++ pcmk_children[i].name, WAIT_TRIES - 1); ++ crm_warn("The process behind %s IPC cannot be" ++ " terminated, so the overall shutdown" ++ " will get delayed implicitly (%ld s)," ++ " which serves as a graceful period for" ++ " its native termination if it vitally" ++ " depends on some other daemons going" ++ " down in a controlled way already", ++ pcmk_children[i].name, ++ (long) SHUTDOWN_ESCALATION_PERIOD); ++ } else { ++ wait_in_progress = true; ++ crm_warn("Cannot reliably track pre-existing" ++ " authentic process behind %s IPC on this" ++ " platform, can still disappear in %d" ++ " attempt(s)", pcmk_children[i].endpoint, ++ WAIT_TRIES - pcmk_children[i].respawn_count); ++ continue; ++ } ++ } ++ crm_notice("Tracking existing %s process (pid=%lld)", ++ pcmk_children[i].name, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid)); ++ pcmk_children[i].respawn_count = -1; /* 0~keep watching */ ++ pcmk_children[i].active_before_startup = TRUE; ++ tracking++; ++ break; ++ case 2: ++ if (pcmk_children[i].respawn_count == WAIT_TRIES) { ++ crm_crit("%s IPC end-point for existing authentic" ++ " process %lld did not (re)appear", ++ pcmk_children[i].endpoint, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid)); ++ return -1; ++ } ++ wait_in_progress = true; ++ crm_warn("Cannot find %s IPC end-point for existing" ++ " authentic process %lld, can still (re)appear" ++ " in %d attempts (?)", ++ pcmk_children[i].endpoint, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid), ++ WAIT_TRIES - pcmk_children[i].respawn_count); ++ continue; ++ case -1: ++ case -2: ++ return cur; /* messages already emitted */ ++ default: ++ crm_crit("Unexpected condition"CRM_XS"cur=%d", cur); ++ return -1; /* unexpected condition */ + } + } ++ if (!wait_in_progress) { ++ break; ++ } ++ (void) poll(NULL, 0, 250); /* a bit for changes to possibly happen */ ++ } ++ for (i = 0; i < SIZEOF(pcmk_children); i++) { ++ pcmk_children[i].respawn_count = 0; /* restore pristine state */ + } + +- if (start_tracker) { +- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, +- NULL); ++ if (tracking) { ++ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, ++ check_active_before_startup_processes, NULL); + } +- closedir(dp); +-#else +- crm_notice("No procfs support, so skipping check for existing components"); +-#endif // SUPPORT_PROCFS ++ return (tracking > INT_MAX) ? INT_MAX : tracking; + } + + static void +@@ -1091,7 +1366,16 @@ main(int argc, char **argv) + setenv("PCMK_watchdog", "false", 1); + } + +- find_and_track_existing_processes(); ++ switch (find_and_track_existing_processes()) { ++ case -1: ++ crm_crit("Internal fatality, see the log"); ++ crm_exit(CRM_EX_FATAL); ++ case -2: ++ crm_crit("Blocked by foreign process, kill the offender"); ++ crm_exit(CRM_EX_CANTCREAT); ++ default: ++ break; ++ }; + + cluster.destroy = mcp_cpg_destroy; + cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; +-- +1.8.3.1 + + +From 052e6045eea77685aabeed12c519c7c9eb9b5287 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:13:31 +0200 +Subject: [PATCH 5/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (3/4) + +[3/4: other daemons to authenticate IPC servers of fellow processes] + +Now that CVE-2018-16877 issue alone is still only partially covered +based on the preceding commits in the set, put the server-by-client +authentication (enabled and 1/3 and partially sported in 2/3) into +practice widely amongst the communicating pacemaker child daemons and +towards CPG API provided by 3rd party but principally using the same +underlying IPC mechanism facilitated by libqb, and consequently close +the remaining "big gap". + +As a small justification to introducing yet another "return +value" int variable, type-correctness is restored for those +that shall be cs_error_t to begin with. +--- + daemons/pacemakerd/pcmkd_corosync.c | 61 +++++++++++- + lib/cluster/corosync.c | 178 ++++++++++++++++++++++++++++++------ + lib/cluster/cpg.c | 81 +++++++++++++--- + lib/common/ipc.c | 43 ++++++++- + 4 files changed, 317 insertions(+), 46 deletions(-) + +diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c +index c73a1f5..65595fa 100644 +--- a/daemons/pacemakerd/pcmkd_corosync.c ++++ b/daemons/pacemakerd/pcmkd_corosync.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2010-2018 Andrew Beekhof ++ * Copyright 2010-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +@@ -21,8 +23,11 @@ + #include + + #include ++#include /* for crm_ipc_is_authentic_process */ + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + enum cluster_type_e stack = pcmk_cluster_unknown; + static corosync_cfg_handle_t cfg_handle; + +@@ -91,7 +96,10 @@ gboolean + cluster_connect_cfg(uint32_t * nodeid) + { + cs_error_t rc; +- int fd = 0, retries = 0; ++ int fd = -1, retries = 0, rv; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, +@@ -101,13 +109,27 @@ cluster_connect_cfg(uint32_t * nodeid) + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { +- crm_err("corosync cfg init error %d", rc); ++ crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { +- crm_err("corosync cfg fd_get error %d", rc); ++ crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CFG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CFG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CFG provider: %s (%d)", ++ strerror(-rv), -rv); + goto bail; + } + +@@ -152,10 +174,15 @@ get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, ch + gboolean + mcp_read_config(void) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + cmap_handle_t local_handle; + uint64_t config = 0; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + // There can be only one possibility + do { +@@ -178,6 +205,30 @@ mcp_read_config(void) + return FALSE; + } + ++ rc = cmap_fd_get(local_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } ++ + stack = get_cluster_type(); + crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); + +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index fcdfc0e..648c0d5 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -43,6 +34,8 @@ + + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + quorum_handle_t pcmk_quorum_handle = 0; + + gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; +@@ -68,10 +61,15 @@ char * + corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + { + int lpc = 0; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + char *name = NULL; + cmap_handle_t local_handle = 0; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + if (nodeid == 0) { + nodeid = get_local_nodeid(0); +@@ -100,6 +98,27 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + + if (cmap_handle == 0) { + cmap_handle = local_handle; ++ ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } + } + + while (name == NULL && cmap_handle != 0) { +@@ -140,6 +159,7 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + lpc++; + } + ++bail: + if(local_handle) { + cmap_finalize(local_handle); + } +@@ -251,11 +271,15 @@ gboolean + cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + void (*destroy) (gpointer)) + { +- int rc = -1; ++ cs_error_t rc; + int fd = 0; + int quorate = 0; + uint32_t quorum_type = 0; + struct mainloop_fd_callbacks quorum_fd_callbacks; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; + quorum_fd_callbacks.destroy = destroy; +@@ -264,7 +288,8 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + + rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); + if (rc != CS_OK) { +- crm_err("Could not connect to the Quorum API: %d", rc); ++ crm_err("Could not connect to the Quorum API: %s (%d)", ++ cs_strerror(rc), rc); + goto bail; + + } else if (quorum_type != QUORUM_SET) { +@@ -272,6 +297,29 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + goto bail; + } + ++ rc = quorum_fd_get(pcmk_quorum_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the Quorum API connection: %s (%d)", ++ strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* Quorum provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("Quorum provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of Quorum provider: %s (%d)", ++ strerror(-rv), -rv); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } ++ + rc = quorum_getquorate(pcmk_quorum_handle, &quorate); + if (rc != CS_OK) { + crm_err("Could not obtain the current Quorum API state: %d", rc); +@@ -292,12 +340,6 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + goto bail; + } + +- rc = quorum_fd_get(pcmk_quorum_handle, &fd); +- if (rc != CS_OK) { +- crm_err("Could not obtain the Quorum API connection: %d", rc); +- goto bail; +- } +- + mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); + + corosync_initialize_nodelist(NULL, FALSE, NULL); +@@ -488,10 +530,15 @@ gboolean + corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) + { + int lpc = 0; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + gboolean any = FALSE; + cmap_handle_t cmap_handle; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + do { + rc = cmap_initialize(&cmap_handle); +@@ -509,6 +556,27 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml + return FALSE; + } + ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + crm_peer_init(); + crm_trace("Initializing corosync nodelist"); + for (lpc = 0; TRUE; lpc++) { +@@ -562,6 +630,7 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml + + free(name); + } ++bail: + cmap_finalize(cmap_handle); + return any; + } +@@ -571,36 +640,68 @@ corosync_cluster_name(void) + { + cmap_handle_t handle; + char *cluster_name = NULL; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + rc = cmap_initialize(&handle); + if (rc != CS_OK) { +- crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); ++ crm_info("Failed to initialize the cmap API: %s (%d)", ++ cs_strerror(rc), rc); + return NULL; + } + ++ rc = cmap_fd_get(handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); + if (rc != CS_OK) { +- crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc); ++ crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); + + } else { + crm_debug("cmap totem.cluster_name = '%s'", cluster_name); + } + ++bail: + cmap_finalize(handle); +- + return cluster_name; + } + + int + corosync_cmap_has_config(const char *prefix) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + static int found = -1; + cmap_handle_t cmap_handle; + cmap_iter_handle_t iter_handle; + char key_name[CMAP_KEYNAME_MAXLEN + 1]; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + if(found != -1) { + return found; +@@ -623,6 +724,27 @@ corosync_cmap_has_config(const char *prefix) + return -1; + } + ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + rc = cmap_iter_init(cmap_handle, prefix, &iter_handle); + if (rc != CS_OK) { + crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)", +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 75af131..430195c 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2004-2018 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +@@ -27,6 +29,8 @@ + + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ + + static bool cpg_evicted = FALSE; +@@ -60,11 +64,16 @@ cluster_disconnect_cpg(crm_cluster_t *cluster) + + uint32_t get_local_nodeid(cpg_handle_t handle) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + static uint32_t local_nodeid = 0; + cpg_handle_t local_handle = handle; + cpg_callbacks_t cb = { }; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + if(local_nodeid != 0) { + return local_nodeid; +@@ -73,6 +82,32 @@ uint32_t get_local_nodeid(cpg_handle_t handle) + if(handle == 0) { + crm_trace("Creating connection"); + cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); ++ if (rc != CS_OK) { ++ crm_err("Could not connect to the CPG API: %s (%d)", ++ cs_strerror(rc), rc); ++ return 0; ++ } ++ ++ rc = cpg_fd_get(local_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CPG API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CPG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CPG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CPG provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } + } + + if (rc == CS_OK) { +@@ -84,6 +119,8 @@ uint32_t get_local_nodeid(cpg_handle_t handle) + if (rc != CS_OK) { + crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); + } ++ ++bail: + if(handle == 0) { + crm_trace("Closing connection"); + cpg_finalize(local_handle); +@@ -403,13 +440,17 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean + cluster_connect_cpg(crm_cluster_t *cluster) + { +- int rc = -1; +- int fd = 0; ++ cs_error_t rc; ++ int fd = -1; + int retries = 0; + uint32_t id = 0; + crm_node_t *peer = NULL; + cpg_handle_t handle = 0; + const char *message_name = pcmk_message_name(crm_system_name); ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + struct mainloop_fd_callbacks cpg_fd_callbacks = { + .dispatch = pcmk_cpg_dispatch, +@@ -434,7 +475,31 @@ cluster_connect_cpg(crm_cluster_t *cluster) + + cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); + if (rc != CS_OK) { +- crm_err("Could not connect to the Cluster Process Group API: %d", rc); ++ crm_err("Could not connect to the CPG API: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ rc = cpg_fd_get(handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CPG API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CPG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CPG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CPG provider: %s (%d)", ++ strerror(-rv), -rv); ++ rc = CS_ERR_ACCESS; + goto bail; + } + +@@ -453,12 +518,6 @@ cluster_connect_cpg(crm_cluster_t *cluster) + goto bail; + } + +- rc = cpg_fd_get(handle, &fd); +- if (rc != CS_OK) { +- crm_err("Could not obtain the CPG API connection: %d", rc); +- goto bail; +- } +- + pcmk_cpg_handle = handle; + cluster->cpg_handle = handle; + mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index aa055d3..84d7cb9 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -947,11 +947,18 @@ crm_ipc_new(const char *name, size_t max_size) + * + * \param[in] client Connection instance obtained from crm_ipc_new() + * +- * \return TRUE on success, FALSE otherwise (in which case errno will be set) ++ * \return TRUE on success, FALSE otherwise (in which case errno will be set; ++ * specifically, in case of discovering the remote side is not ++ * authentic, its value is set to ECONNABORTED). + */ + bool + crm_ipc_connect(crm_ipc_t * client) + { ++ static uid_t cl_uid = 0; ++ static gid_t cl_gid = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ int rv; ++ + client->need_reply = FALSE; + client->ipc = qb_ipcc_connect(client->name, client->buf_size); + +@@ -962,7 +969,39 @@ crm_ipc_connect(crm_ipc_t * client) + + client->pfd.fd = crm_ipc_get_fd(client); + if (client->pfd.fd < 0) { +- crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); ++ rv = errno; ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = rv; ++ return FALSE; ++ } ++ ++ if (!cl_uid && !cl_gid ++ && (rv = crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid)) < 0) { ++ errno = -rv; ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = -rv; ++ return FALSE; ++ } ++ ++ if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, ++ &found_pid, &found_uid, ++ &found_gid))) { ++ crm_err("Daemon (IPC %s) is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ crm_ipc_close(client); ++ errno = ECONNABORTED; ++ return FALSE; ++ ++ } else if (rv < 0) { ++ errno = -rv; ++ crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", ++ client->name); ++ crm_ipc_close(client); ++ errno = -rv; + return FALSE; + } + +-- +1.8.3.1 + + +From d324e407c0e2695f405974d567d79eb91d0ee69a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:13:40 +0200 +Subject: [PATCH 6/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (4/4) + +[4/4: CPG users to be careful about now-more-probable rival processes] + +In essence, this comes down to pacemaker confusing at-node CPG members +with effectively the only plausible to co-exist at particular node, +which doesn't hold and asks for a wider reconciliation of this +reality-check. + +However, in practical terms, since there are two factors lowering the +priority of doing so: + +1/ possibly the only non-self-inflicted scenario is either that + some of the cluster stack processes fail -- this the problem + that shall rather be deferred to arranged node disarming/fencing + to stay on the safe side with 100% certainty, at the cost of + possibly long-lasting failover process at other nodes + (for other possibility, someone running some of these by accident + so they effectively become rival processes, it's like getting + hands cut when playing with a lawnmower in an unintended way) + +2/ for state tracking of the peer nodes, it may possibly cause troubles + in case the process observed as left wasn't the last for the + particular node, even if presumably just temporary, since the + situation may eventually resolve with imposed serialization of + the rival processes via API end-point singleton restriction (this + is also the most likely cause of why such non-final leave gets + observed in the first place), except in one case -- the legitimate + API end-point carrier won't possibly acknowledged as returned + by its peers, at least not immediately, unless it tries to join + anew, which verges on undefined behaviour (at least per corosync + documentation) + +we make do just with a light code change so as to + +* limit 1/ some more with in-daemon self-check for pre-existing + end-point existence (this is to complement the checks already made in + the parent daemon prior to spawning new instances, only some moments + later; note that we don't have any lock file etc. mechanisms to + prevent parallel runs of the same daemons, and people could run these + on their own deliberation), and to + +* guard against the interferences from the rivals at the same node + per 2/ with ignoring their non-final leave messages altogether. + +Note that CPG at this point is already expected to be authenticity-safe. + +Regarding now-more-probable part, we actually traded the inherently racy +procfs scanning for something (exactly that singleton mentioned above) +rather firm (and unfakeable), but we admittedly got lost track of +processes that are after CPG membership (that is, another form of +a shared state) prior to (or in non-deterministic order allowing for +the same) carring about publishing the end-point. + +Big thanks is owed to Yan Gao of SUSE, for early discovery and reporting +this discrepancy arising from the earlier commits in the set. +--- + daemons/attrd/pacemaker-attrd.c | 15 ++++++ + daemons/based/pacemaker-based.c | 18 ++++++- + daemons/controld/pacemaker-controld.c | 18 ++++++- + daemons/fenced/pacemaker-fenced.c | 15 ++++++ + lib/cluster/cpg.c | 91 ++++++++++++++++++++++++++++++++--- + 5 files changed, 146 insertions(+), 11 deletions(-) + +diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c +index 837459f..9ffe201 100644 +--- a/daemons/attrd/pacemaker-attrd.c ++++ b/daemons/attrd/pacemaker-attrd.c +@@ -344,6 +344,7 @@ main(int argc, char **argv) + int flag = 0; + int index = 0; + int argerr = 0; ++ crm_ipc_t *old_instance = NULL; + + attrd_init_mainloop(); + crm_log_preinit(NULL, argc, argv); +@@ -380,6 +381,20 @@ main(int argc, char **argv) + + crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_info("Starting up"); ++ ++ old_instance = crm_ipc_new(T_ATTRD, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("pacemaker-attrdd is already active, aborting startup"); ++ crm_exit(CRM_EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); + + /* Connect to the CIB before connecting to the cluster or listening for IPC. +diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c +index bd98ca8..f004f671 100644 +--- a/daemons/based/pacemaker-based.c ++++ b/daemons/based/pacemaker-based.c +@@ -90,13 +90,12 @@ main(int argc, char **argv) + int index = 0; + int argerr = 0; + struct passwd *pwentry = NULL; ++ crm_ipc_t *old_instance = NULL; + + crm_log_preinit(NULL, argc, argv); + crm_set_options(NULL, "[options]", + long_options, "Daemon for storing and replicating the cluster configuration"); + +- crm_peer_init(); +- + mainloop_add_signal(SIGTERM, cib_shutdown); + mainloop_add_signal(SIGPIPE, cib_enable_writes); + +@@ -171,6 +170,19 @@ main(int argc, char **argv) + + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + ++ old_instance = crm_ipc_new(CIB_CHANNEL_RO, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("pacemaker-based is already active, aborting startup"); ++ crm_exit(CRM_EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + if (cib_root == NULL) { + cib_root = CRM_CONFIG_DIR; + } else { +@@ -185,6 +197,8 @@ main(int argc, char **argv) + return CRM_EX_FATAL; + } + ++ crm_peer_init(); ++ + /* read local config file */ + cib_init(); + +diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c +index 11f909e..38842fb 100644 +--- a/daemons/controld/pacemaker-controld.c ++++ b/daemons/controld/pacemaker-controld.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2004-2018 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +@@ -50,6 +52,7 @@ main(int argc, char **argv) + int flag; + int index = 0; + int argerr = 0; ++ crm_ipc_t *old_instance = NULL; + + crmd_mainloop = g_main_loop_new(NULL, FALSE); + crm_log_preinit(NULL, argc, argv); +@@ -93,6 +96,19 @@ main(int argc, char **argv) + crm_help('?', CRM_EX_USAGE); + } + ++ old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("pacemaker-controld is already active, aborting startup"); ++ crm_exit(CRM_EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + fprintf(stderr, +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 105012b..da0dce9 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -1262,6 +1262,7 @@ main(int argc, char **argv) + int option_index = 0; + crm_cluster_t cluster; + const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; ++ crm_ipc_t *old_instance = NULL; + + crm_log_preinit(NULL, argc, argv); + crm_set_options(NULL, "mode [options]", long_options, +@@ -1432,6 +1433,20 @@ main(int argc, char **argv) + } + + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); ++ ++ old_instance = crm_ipc_new("stonith-ng", 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("pacemaker-fenced is already active, aborting startup"); ++ crm_exit(CRM_EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + mainloop_add_signal(SIGTERM, stonith_shutdown); + + crm_peer_init(); +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 430195c..2898c51 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -362,6 +362,20 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + #define PEER_NAME(peer) ((peer)? ((peer)->uname? (peer)->uname : "") : "") + ++static int cmp_member_list_nodeid(const void *first, ++ const void *second) ++{ ++ const struct cpg_address *const a = *((const struct cpg_address **) first), ++ *const b = *((const struct cpg_address **) second); ++ if (a->nodeid < b->nodeid) { ++ return -1; ++ } else if (a->nodeid > b->nodeid) { ++ return 1; ++ } ++ /* don't bother with "reason" nor "pid" */ ++ return 0; ++} ++ + void + pcmk_cpg_membership(cpg_handle_t handle, + const struct cpg_name *groupName, +@@ -373,29 +387,89 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean found = FALSE; + static int counter = 0; + uint32_t local_nodeid = get_local_nodeid(handle); ++ const struct cpg_address *key, **rival, **sorted; ++ ++ sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); ++ CRM_ASSERT(sorted != NULL); ++ ++ for (size_t iter = 0; iter < member_list_entries; iter++) { ++ sorted[iter] = member_list + iter; ++ } ++ /* so that the cross-matching multiply-subscribed nodes is then cheap */ ++ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), ++ cmp_member_list_nodeid); + + for (i = 0; i < left_list_entries; i++) { + crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); + +- crm_info("Group event %s.%d: node %u (%s) left", ++ crm_info("Group event %s.%d: node %u (%s) left: %llu", + groupName->value, counter, left_list[i].nodeid, +- PEER_NAME(peer)); ++ PEER_NAME(peer), (unsigned long long) left_list[i].pid); ++ ++ /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation ++ and not playing by this rule may go wild in case of multiple ++ residual instances of the same pacemaker daemon at the same node ++ -- we must ensure that the possible local rival(s) won't make us ++ cry out and bail (e.g. when they quit themselves), since all the ++ surrounding logic denies this simple fact that the full membership ++ is discriminated also per the PID of the process beside mere node ++ ID (and implicitly, group ID); practically, this will be sound in ++ terms of not preventing progress, since all the CPG joiners are ++ also API end-point carriers, and that's what matters locally ++ (who's the winner); ++ remotely, we will just compare leave_list and member_list and if ++ the left process has it's node retained in member_list (under some ++ other PID, anyway) we will just ignore it as well ++ XXX: long-term fix is to establish in-out PID-aware tracking? */ + if (peer) { +- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); ++ key = &left_list[i]; ++ rival = bsearch(&key, sorted, member_list_entries, ++ sizeof(const struct cpg_address *), ++ cmp_member_list_nodeid); ++ if (rival == NULL) { ++ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ++ OFFLINESTATUS); ++ } else if (left_list[i].nodeid == local_nodeid) { ++ crm_info("Ignoring the above event %s.%d, comes from a local" ++ " rival process (presumably not us): %llu", ++ groupName->value, counter, ++ (unsigned long long) left_list[i].pid); ++ } else { ++ crm_info("Ignoring the above event %s.%d, comes from" ++ " a rival-rich node: %llu (e.g. %llu process" ++ " carries on)", ++ groupName->value, counter, ++ (unsigned long long) left_list[i].pid, ++ (unsigned long long) (*rival)->pid); ++ } + } + } ++ free(sorted); ++ sorted = NULL; + + for (i = 0; i < joined_list_entries; i++) { +- crm_info("Group event %s.%d: node %u joined", +- groupName->value, counter, joined_list[i].nodeid); ++ crm_info("Group event %s.%d: node %u joined: %llu" ++ " (unchecked for rivals)", ++ groupName->value, counter, joined_list[i].nodeid, ++ (unsigned long long) joined_list[i].pid); + } + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); + +- crm_info("Group event %s.%d: node %u (%s) is member", ++ crm_info("Group event %s.%d: node %u (%s) is member: %llu" ++ " (at least once)", + groupName->value, counter, member_list[i].nodeid, +- PEER_NAME(peer)); ++ PEER_NAME(peer), member_list[i].pid); ++ ++ if (member_list[i].nodeid == local_nodeid ++ && member_list[i].pid != getpid()) { ++ /* see the note above */ ++ crm_info("Ignoring the above event %s.%d, comes from a local rival" ++ " process: %llu", groupName->value, counter, ++ (unsigned long long) member_list[i].pid); ++ continue; ++ } + + /* If the caller left auto-reaping enabled, this will also update the + * state to member. +@@ -418,7 +492,8 @@ pcmk_cpg_membership(cpg_handle_t handle, + + } else if (now > (peer->when_lost + 60)) { + // If it persists for more than a minute, update the state +- crm_warn("Node %u member of group %s but believed offline", ++ crm_warn("Node %u member of group %s but believed offline" ++ " (unchecked for rivals)", + member_list[i].nodeid, groupName->value); + crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); + } +-- +1.8.3.1 + + +From 3ad7b2509d78f95b5dfc8fffc4d9a91be1da5113 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Wed, 17 Apr 2019 15:04:21 +0200 +Subject: [PATCH 7/7] Med: controld: fix possible NULL pointer dereference + +This is now more likely triggerable once the problems related to +CVE-2018-16878 are avoided. +--- + daemons/controld/controld_control.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index ee95698..0ac358c 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -77,12 +77,15 @@ do_ha_control(long long action, + registered = crm_connect_corosync(cluster); + #endif + } +- controld_election_init(cluster->uname); +- fsa_our_uname = cluster->uname; +- fsa_our_uuid = cluster->uuid; +- if(cluster->uuid == NULL) { +- crm_err("Could not obtain local uuid"); +- registered = FALSE; ++ ++ if (registered == TRUE) { ++ controld_election_init(cluster->uname); ++ fsa_our_uname = cluster->uname; ++ fsa_our_uuid = cluster->uuid; ++ if(cluster->uuid == NULL) { ++ crm_err("Could not obtain local uuid"); ++ registered = FALSE; ++ } + } + + if (registered == FALSE) { +-- +1.8.3.1 + diff --git a/SOURCES/003-security-log.patch b/SOURCES/003-security-log.patch new file mode 100644 index 0000000..cbb521b --- /dev/null +++ b/SOURCES/003-security-log.patch @@ -0,0 +1,186 @@ +From bccf845261c6e69fc4e6bdb8cf4e630a4a4ec7a8 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Apr 2019 09:46:51 -0500 +Subject: [PATCH] Log: libcrmcluster: improve CPG membership messages + +Show CPG event reason when provided by corosync, make messages more readable, +upgrade duplicate pid messages to warnings (and log only one message in those +cases). +--- + lib/cluster/cpg.c | 91 ++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 56 insertions(+), 35 deletions(-) + +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 2898c51..ef6fa36 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -360,8 +360,6 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + return NULL; + } + +-#define PEER_NAME(peer) ((peer)? ((peer)->uname? (peer)->uname : "") : "") +- + static int cmp_member_list_nodeid(const void *first, + const void *second) + { +@@ -376,6 +374,32 @@ static int cmp_member_list_nodeid(const void *first, + return 0; + } + ++static const char * ++cpgreason2str(cpg_reason_t reason) ++{ ++ switch (reason) { ++ case CPG_REASON_JOIN: return " via cpg_join"; ++ case CPG_REASON_LEAVE: return " via cpg_leave"; ++ case CPG_REASON_NODEDOWN: return " via cluster exit"; ++ case CPG_REASON_NODEUP: return " via cluster join"; ++ case CPG_REASON_PROCDOWN: return " for unknown reason"; ++ default: break; ++ } ++ return ""; ++} ++ ++static inline const char * ++peer_name(crm_node_t *peer) ++{ ++ if (peer == NULL) { ++ return "unknown node"; ++ } else if (peer->uname == NULL) { ++ return "peer node"; ++ } else { ++ return peer->uname; ++ } ++} ++ + void + pcmk_cpg_membership(cpg_handle_t handle, + const struct cpg_name *groupName, +@@ -387,7 +411,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean found = FALSE; + static int counter = 0; + uint32_t local_nodeid = get_local_nodeid(handle); +- const struct cpg_address *key, **rival, **sorted; ++ const struct cpg_address *key, **sorted; + + sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); + CRM_ASSERT(sorted != NULL); +@@ -401,10 +425,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + + for (i = 0; i < left_list_entries; i++) { + crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); +- +- crm_info("Group event %s.%d: node %u (%s) left: %llu", +- groupName->value, counter, left_list[i].nodeid, +- PEER_NAME(peer), (unsigned long long) left_list[i].pid); ++ const struct cpg_address **rival = NULL; + + /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation + and not playing by this rule may go wild in case of multiple +@@ -418,7 +439,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + also API end-point carriers, and that's what matters locally + (who's the winner); + remotely, we will just compare leave_list and member_list and if +- the left process has it's node retained in member_list (under some ++ the left process has its node retained in member_list (under some + other PID, anyway) we will just ignore it as well + XXX: long-term fix is to establish in-out PID-aware tracking? */ + if (peer) { +@@ -426,50 +447,51 @@ pcmk_cpg_membership(cpg_handle_t handle, + rival = bsearch(&key, sorted, member_list_entries, + sizeof(const struct cpg_address *), + cmp_member_list_nodeid); +- if (rival == NULL) { ++ } ++ ++ if (rival == NULL) { ++ crm_info("Group %s event %d: %s (node %u pid %u) left%s", ++ groupName->value, counter, peer_name(peer), ++ left_list[i].nodeid, left_list[i].pid, ++ cpgreason2str(left_list[i].reason)); ++ if (peer) { + crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, + OFFLINESTATUS); +- } else if (left_list[i].nodeid == local_nodeid) { +- crm_info("Ignoring the above event %s.%d, comes from a local" +- " rival process (presumably not us): %llu", +- groupName->value, counter, +- (unsigned long long) left_list[i].pid); +- } else { +- crm_info("Ignoring the above event %s.%d, comes from" +- " a rival-rich node: %llu (e.g. %llu process" +- " carries on)", +- groupName->value, counter, +- (unsigned long long) left_list[i].pid, +- (unsigned long long) (*rival)->pid); + } ++ } else if (left_list[i].nodeid == local_nodeid) { ++ crm_warn("Group %s event %d: duplicate local pid %u left%s", ++ groupName->value, counter, ++ left_list[i].pid, cpgreason2str(left_list[i].reason)); ++ } else { ++ crm_warn("Group %s event %d: " ++ "%s (node %u) duplicate pid %u left%s (%u remains)", ++ groupName->value, counter, peer_name(peer), ++ left_list[i].nodeid, left_list[i].pid, ++ cpgreason2str(left_list[i].reason), (*rival)->pid); + } + } + free(sorted); + sorted = NULL; + + for (i = 0; i < joined_list_entries; i++) { +- crm_info("Group event %s.%d: node %u joined: %llu" +- " (unchecked for rivals)", ++ crm_info("Group %s event %d: node %u pid %u joined%s", + groupName->value, counter, joined_list[i].nodeid, +- (unsigned long long) joined_list[i].pid); ++ joined_list[i].pid, cpgreason2str(joined_list[i].reason)); + } + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); + +- crm_info("Group event %s.%d: node %u (%s) is member: %llu" +- " (at least once)", +- groupName->value, counter, member_list[i].nodeid, +- PEER_NAME(peer), member_list[i].pid); +- + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { + /* see the note above */ +- crm_info("Ignoring the above event %s.%d, comes from a local rival" +- " process: %llu", groupName->value, counter, +- (unsigned long long) member_list[i].pid); ++ crm_warn("Group %s event %d: detected duplicate local pid %u", ++ groupName->value, counter, member_list[i].pid); + continue; + } ++ crm_info("Group %s event %d: %s (node %u pid %u) is member", ++ groupName->value, counter, peer_name(peer), ++ member_list[i].nodeid, member_list[i].pid); + + /* If the caller left auto-reaping enabled, this will also update the + * state to member. +@@ -492,8 +514,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + + } else if (now > (peer->when_lost + 60)) { + // If it persists for more than a minute, update the state +- crm_warn("Node %u member of group %s but believed offline" +- " (unchecked for rivals)", ++ crm_warn("Node %u is member of group %s but was believed offline", + member_list[i].nodeid, groupName->value); + crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0); + } +@@ -505,7 +526,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + if (!found) { +- crm_err("We're not part of CPG group '%s' anymore!", groupName->value); ++ crm_err("Local node was evicted from group %s", groupName->value); + cpg_evicted = TRUE; + } + +-- +1.8.3.1 + diff --git a/SOURCES/004-security-active.patch b/SOURCES/004-security-active.patch new file mode 100644 index 0000000..806dffc --- /dev/null +++ b/SOURCES/004-security-active.patch @@ -0,0 +1,26 @@ +From 7dda20dac25f07eae959ca25cc974ef2fa6daf02 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 24 Apr 2019 16:25:46 -0500 +Subject: [PATCH] Fix: libcrmcommon: avoid use-of-NULL when checking whether + process is active + +--- + lib/common/pid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/common/pid.c b/lib/common/pid.c +index 2439680..4fbf2dd 100644 +--- a/lib/common/pid.c ++++ b/lib/common/pid.c +@@ -57,7 +57,7 @@ crm_pid_active(long pid, const char *daemon) + } else if (rc == 0 && (daemon == NULL || have_proc_pid == -1)) { + return 1; /* kill as the only indicator, cannot double check */ + +- } else { ++ } else if (daemon != NULL) { + /* make sure PID hasn't been reused by another process + XXX: might still be just a zombie, which could confuse decisions */ + bool checked_through_kill = (rc == 0); +-- +1.8.3.1 + diff --git a/SOURCES/005-security-code.patch b/SOURCES/005-security-code.patch new file mode 100644 index 0000000..633a396 --- /dev/null +++ b/SOURCES/005-security-code.patch @@ -0,0 +1,28 @@ +From d9b0269d59a00329feb19b6e65b10a233a3dd414 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 29 Apr 2019 14:34:32 -0500 +Subject: [PATCH] Low: libcrmcommon: return proper code if testing pid is + denied + +7dda20d avoided a use-of-NULL in an unlikely corner case, but returned the +wrong code in that case. +--- + lib/common/pid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/common/pid.c b/lib/common/pid.c +index 4fbf2dd..ccee03f 100644 +--- a/lib/common/pid.c ++++ b/lib/common/pid.c +@@ -47,7 +47,7 @@ crm_pid_active(long pid, const char *daemon) + } else if ((rc = kill(pid, 0)) < 0 && errno == ESRCH) { + return 0; /* no such PID detected */ + +- } else if (rc < 0 && have_proc_pid == -1) { ++ } else if (rc < 0 && (daemon == NULL || have_proc_pid == -1)) { + if (last_asked_pid != pid) { + crm_info("Cannot examine PID %ld: %s", pid, strerror(errno)); + last_asked_pid = pid; +-- +1.8.3.1 + diff --git a/SOURCES/006-migration.patch b/SOURCES/006-migration.patch new file mode 100644 index 0000000..9a87584 --- /dev/null +++ b/SOURCES/006-migration.patch @@ -0,0 +1,55 @@ +From 0dca216a84720e5652b1d87aac8c7e0aa8b4a31c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 27 Feb 2019 14:21:26 -0600 +Subject: [PATCH] High: libcrmcommon: complete interrupted live migrations + correctly + +8435966 (2.0.0) got the sense of compare_version() wrong when attempting to +remove the old behavior for recording partial live migrations, instead +re-introducing it. + +This causes a regression in behavior when a transition containing a live +migration is aborted after the migrate_to has been initiated but before the +migrate_from has been initiated. In that case, it would schedule a full start +on the destination node rather than a migrate_from. +--- + lib/common/operations.c | 16 ++-------------- + 1 file changed, 2 insertions(+), 14 deletions(-) + +diff --git a/lib/common/operations.c b/lib/common/operations.c +index 40d4c18..88c4fa5 100644 +--- a/lib/common/operations.c ++++ b/lib/common/operations.c +@@ -396,8 +396,8 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * c + + task = op->op_type; + +- /* Remap the task name under various scenarios, to make life easier for the +- * PE when determining the current state. ++ /* Record a successful reload as a start, and a failed reload as a monitor, ++ * to make life easier for the scheduler when determining the current state. + */ + if (crm_str_eq(task, "reload", TRUE)) { + if (op->op_status == PCMK_LRM_OP_DONE) { +@@ -405,18 +405,6 @@ create_operation_update(xmlNode * parent, lrmd_event_data_t * op, const char * c + } else { + task = CRMD_ACTION_STATUS; + } +- +- } else if (crm_str_eq(task, CRMD_ACTION_MIGRATE, TRUE)) { +- /* if the migrate_from fails it will have enough info to do the right thing */ +- if (op->op_status == PCMK_LRM_OP_DONE) { +- task = CRMD_ACTION_STOP; +- } else { +- task = CRMD_ACTION_STATUS; +- } +- +- } else if ((op->op_status == PCMK_LRM_OP_DONE) +- && crm_str_eq(task, CRMD_ACTION_MIGRATED, TRUE)) { +- task = CRMD_ACTION_START; + } + + key = generate_op_key(op->rsc_id, task, op->interval_ms); +-- +1.8.3.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 0766f89..f1deaee 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -195,7 +195,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist} +Release: %{pcmk_release}%{?dist}.3 %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else @@ -212,6 +212,11 @@ Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz # upstream commits Patch1: 001-rc4.patch +Patch2: 002-security.patch +Patch3: 003-security-log.patch +Patch4: 004-security-active.patch +Patch5: 005-security-code.patch +Patch6: 006-migration.patch # patches that aren't from upstream Patch100: rhbz-url.patch @@ -226,7 +231,7 @@ Requires: psmisc %endif %{?systemd_requires} -ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 %{arm} +ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 Requires: %{python_path} BuildRequires: %{python_name}-devel @@ -880,8 +885,29 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog -* Mon May 20 2019 Pablo Greco - 2.0.1-4 -- Allow building in armhfp +* Thu May 9 2019 Klaus Wenninger - 2.0.1-4.3 +- New build with fixed test in gating.yaml +- Resolves: rhbz#1694557 +- Resolves: rhbz#1695247 +- Resolves: rhbz#1697264 +- Resolves: rhbz#1697265 + +* Mon May 6 2019 Ken Gaillot - 2.0.1-4.2 +- New build to apply z-stream tag +- Resolves: rhbz#1694557 +- Resolves: rhbz#1695247 +- Resolves: rhbz#1697264 +- Resolves: rhbz#1697265 + +* Sun May 5 2019 Ken Gaillot - 2.0.1-4.1 +- Improve clients' authentication of IPC servers (CVE-2018-16877) +- Fix use-after-free with potential information disclosure (CVE-2019-3885) +- Improve pacemakerd authentication of running subdaemons (CVE-2018-16878) +- Restore correct behavior when live migration is interrupted +- Resolves: rhbz#1694557 +- Resolves: rhbz#1695247 +- Resolves: rhbz#1697264 +- Resolves: rhbz#1697265 * Tue Jan 29 2019 Ken Gaillot - 2.0.1-4 - Remove duplicate fence history state listing in crm_mon XML output