Blob Blame History Raw
From 17d5ceac78f610aabf6a3678813706faf252c2fb Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 17:56:29 +0200
Subject: [PATCH 1/6] Fix: ipc-api: allow calling connect after disconnection

---
 lib/common/crmcommon_private.h |  1 +
 lib/common/ipc_client.c        | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
index 49dae6c..d55df99 100644
--- a/lib/common/crmcommon_private.h
+++ b/lib/common/crmcommon_private.h
@@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s {
 struct pcmk_ipc_api_s {
     enum pcmk_ipc_server server;          // Daemon this IPC API instance is for
     enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched
+    size_t ipc_size_max;                  // maximum IPC buffer size
     crm_ipc_t *ipc;                       // IPC connection
     mainloop_io_t *mainloop_io;     // If using mainloop, I/O source for IPC
     bool free_on_disconnect;        // Whether disconnect should free object
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index 4077d61..df687da 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -46,8 +46,6 @@
 int
 pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
 {
-    size_t max_size = 0;
-
     if (api == NULL) {
         return EINVAL;
     }
@@ -64,13 +62,15 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
         return EOPNOTSUPP;
     }
 
+    (*api)->ipc_size_max = 0;
+
     // Set server methods and max_size (if not default)
     switch (server) {
         case pcmk_ipc_attrd:
             break;
 
         case pcmk_ipc_based:
-            max_size = 512 * 1024; // 512KB
+            (*api)->ipc_size_max = 512 * 1024; // 512KB
             break;
 
         case pcmk_ipc_controld:
@@ -88,7 +88,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
 
         case pcmk_ipc_schedulerd:
             // @TODO max_size could vary by client, maybe take as argument?
-            max_size = 5 * 1024 * 1024; // 5MB
+            (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB
             break;
     }
     if ((*api)->cmds == NULL) {
@@ -97,7 +97,8 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
         return ENOMEM;
     }
 
-    (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size);
+    (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false),
+                              (*api)->ipc_size_max);
     if ((*api)->ipc == NULL) {
         pcmk_free_ipc_api(*api);
         *api = NULL;
@@ -451,11 +452,20 @@ pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type)
 {
     int rc = pcmk_rc_ok;
 
-    if ((api == NULL) || (api->ipc == NULL)) {
+    if (api == NULL) {
         crm_err("Cannot connect to uninitialized API object");
         return EINVAL;
     }
 
+    if (api->ipc == NULL) {
+        api->ipc = crm_ipc_new(pcmk_ipc_name(api, false),
+                                  api->ipc_size_max);
+        if (api->ipc == NULL) {
+            crm_err("Failed to re-create IPC API");
+            return ENOMEM;
+        }
+    }
+
     if (crm_ipc_connected(api->ipc)) {
         crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true));
         return pcmk_rc_ok;
-- 
1.8.3.1


From e5ad1a6c54da48c86c8ab262abd4921cb37e998d Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 18:18:01 +0200
Subject: [PATCH 2/6] Fix: ipc-api: avoid infinite loop when disconnected

Happens when using pcmk_dispatch_ipc when dispatching without
mainloop.
---
 lib/common/ipc_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index df687da..aa032fe 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -392,7 +392,7 @@ pcmk_dispatch_ipc(pcmk_ipc_api_t *api)
     if (api == NULL) {
         return;
     }
-    while (crm_ipc_ready(api->ipc)) {
+    while (crm_ipc_ready(api->ipc) > 0) {
         if (crm_ipc_read(api->ipc) > 0) {
             dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api);
         }
-- 
1.8.3.1


From 927b43a57d5e8256fbce8fe0792f8ea228c57687 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 9 Dec 2019 15:13:11 +0100
Subject: [PATCH 3/6] Fix: sbd-integration: sync pacemakerd with sbd

Make pacemakerd wait to be pinged by sbd before starting
sub-daemons. Pings further reply health-state and timestamp
of last successful check. On shutdown bring down all the
sub-daemons and wait to be polled for state by sbd before
finally exiting pacemakerd.
Add new api as not to make the xml-structure an external interface.
---
 daemons/pacemakerd/pacemakerd.c     | 100 ++++++++++++++--
 include/crm/common/Makefile.am      |   2 +-
 include/crm/common/ipc_pacemakerd.h |  71 +++++++++++
 include/crm/msg_xml.h               |   7 ++
 lib/common/Makefile.am              |   1 +
 lib/common/crmcommon_private.h      |   3 +
 lib/common/ipc_client.c             |   5 +-
 lib/common/ipc_pacemakerd.c         | 232 ++++++++++++++++++++++++++++++++++++
 8 files changed, 410 insertions(+), 11 deletions(-)
 create mode 100644 include/crm/common/ipc_pacemakerd.h
 create mode 100644 lib/common/ipc_pacemakerd.c

diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index 652d6ca..ccfae66 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -40,8 +40,25 @@ static bool global_keep_tracking = false;
 #define PCMK_PROCESS_CHECK_INTERVAL 5
 
 static crm_trigger_t *shutdown_trigger = NULL;
+static crm_trigger_t *startup_trigger = NULL;
 static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
 
+/* state we report when asked via pacemakerd-api status-ping */
+static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
+static gboolean running_with_sbd = FALSE; /* local copy */
+/* When contacted via pacemakerd-api by a client having sbd in
+ * the name we assume it is sbd-daemon which wants to know
+ * if pacemakerd shutdown gracefully.
+ * Thus when everything is shutdown properly pacemakerd
+ * waits till it has reported the graceful completion of
+ * shutdown to sbd and just when sbd-client closes the
+ * connection we can assume that the report has arrived
+ * properly so that pacemakerd can finally exit.
+ * Following two variables are used to track that handshake.
+ */
+static unsigned int shutdown_complete_state_reported_to = 0;
+static gboolean shutdown_complete_state_reported_client_closed = FALSE;
+
 typedef struct pcmk_child_s {
     pid_t pid;
     long flag;
@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data)
 static gboolean
 pcmk_shutdown_worker(gpointer user_data)
 {
-    static int phase = 0;
+    static int phase = SIZEOF(pcmk_children);
     static time_t next_log = 0;
-    static int max = SIZEOF(pcmk_children);
 
     int lpc = 0;
 
-    if (phase == 0) {
+    if (phase == SIZEOF(pcmk_children)) {
         crm_notice("Shutting down Pacemaker");
-        phase = max;
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
     }
 
     for (; phase > 0; phase--) {
         /* Don't stop anything with start_seq < 1 */
 
-        for (lpc = max - 1; lpc >= 0; lpc--) {
+        for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
             pcmk_child_t *child = &(pcmk_children[lpc]);
 
             if (phase != child->start_seq) {
@@ -436,6 +452,11 @@ pcmk_shutdown_worker(gpointer user_data)
     }
 
     crm_notice("Shutdown complete");
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
+    if (!fatal_error && running_with_sbd &&
+        !shutdown_complete_state_reported_client_closed) {
+        return TRUE;
+    }
 
     {
         const char *delay = pcmk__env_option("shutdown_delay");
@@ -489,6 +510,51 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
     return 0;
 }
 
+static void
+pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
+{
+    const char *value = NULL;
+    xmlNode *ping = NULL;
+    xmlNode *reply = NULL;
+    time_t pinged = time(NULL);
+    const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+    /* Pinged for status */
+    crm_trace("Pinged from %s.%s",
+              crm_str(crm_element_value(msg, F_CRM_ORIGIN)),
+              from?from:"unknown");
+    ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+    value = crm_element_value(msg, F_CRM_SYS_TO);
+    crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+    crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
+    crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
+    crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+    reply = create_reply(msg, ping);
+    free_xml(ping);
+    if (reply) {
+        if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
+                pcmk_rc_ok) {
+            crm_err("Failed sending ping-reply");
+        }
+        free_xml(reply);
+    } else {
+        crm_err("Failed building ping-reply");
+    }
+    /* just proceed state on sbd pinging us */
+    if (from && strstr(from, "sbd")) {
+        if (crm_str_eq(pacemakerd_state,
+                       XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
+                       TRUE)) {
+            shutdown_complete_state_reported_to = c->pid;
+        } else if (crm_str_eq(pacemakerd_state,
+                              XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
+                              TRUE)) {
+            pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+            mainloop_set_trigger(startup_trigger);
+        }
+    }
+}
+
 /* Exit code means? */
 static int32_t
 pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
@@ -514,6 +580,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
         crm_trace("Ignoring IPC request to purge node "
                   "because peer cache is not used");
 
+    } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
+        pcmk_handle_ping_request(c, msg, id);
+
     } else {
         crm_debug("Unrecognized IPC command '%s' sent to pacemakerd",
                   crm_str(task));
@@ -533,6 +602,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
         return 0;
     }
     crm_trace("Connection %p", c);
+    if (shutdown_complete_state_reported_to == client->pid) {
+        shutdown_complete_state_reported_client_closed = TRUE;
+        if (shutdown_trigger) {
+            mainloop_set_trigger(shutdown_trigger);
+        }
+    }
     pcmk__free_client(client);
     return 0;
 }
@@ -924,8 +999,8 @@ find_and_track_existing_processes(void)
     return pcmk_rc_ok;
 }
 
-static void
-init_children_processes(void)
+static gboolean
+init_children_processes(void *user_data)
 {
     int start_seq = 1, lpc = 0;
     static int max = SIZEOF(pcmk_children);
@@ -951,6 +1026,8 @@ init_children_processes(void)
      * This may be useful for the daemons to know
      */
     setenv("PCMK_respawned", "true", 1);
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
+    return TRUE;
 }
 
 static void
@@ -1154,6 +1231,7 @@ main(int argc, char **argv)
 
     if(pcmk_locate_sbd() > 0) {
         setenv("PCMK_watchdog", "true", 1);
+        running_with_sbd = TRUE;
     } else {
         setenv("PCMK_watchdog", "false", 1);
     }
@@ -1170,7 +1248,13 @@ main(int argc, char **argv)
     mainloop_add_signal(SIGTERM, pcmk_shutdown);
     mainloop_add_signal(SIGINT, pcmk_shutdown);
 
-    init_children_processes();
+    if (running_with_sbd) {
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
+        startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
+    } else {
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+        init_children_processes(NULL);
+    }
 
     crm_notice("Pacemaker daemon successfully started and accepting connections");
     g_main_loop_run(mainloop);
diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am
index f29d105..1b5730a 100644
--- a/include/crm/common/Makefile.am
+++ b/include/crm/common/Makefile.am
@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
 headerdir=$(pkgincludedir)/crm/common
 
 header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
-		 nvpair.h acl.h ipc_controld.h
+		 nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h
 noinst_HEADERS = internal.h alerts_internal.h \
 		 iso8601_internal.h remote_internal.h xml_internal.h \
 		 ipc_internal.h output.h cmdline_internal.h curses_internal.h \
diff --git a/include/crm/common/ipc_pacemakerd.h b/include/crm/common/ipc_pacemakerd.h
new file mode 100644
index 0000000..00e3edd
--- /dev/null
+++ b/include/crm/common/ipc_pacemakerd.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__IPC_PACEMAKERD__H
+#  define PCMK__IPC_PACEMAKERD__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \file
+ * \brief IPC commands for Pacemakerd
+ *
+ * \ingroup core
+ */
+
+#include <sys/types.h>       // time_t
+#include <crm/common/ipc.h>  // pcmk_ipc_api_t
+
+enum pcmk_pacemakerd_state {
+    pcmk_pacemakerd_state_invalid = -1,
+    pcmk_pacemakerd_state_init = 0,
+    pcmk_pacemakerd_state_starting_daemons,
+    pcmk_pacemakerd_state_wait_for_ping,
+    pcmk_pacemakerd_state_running,
+    pcmk_pacemakerd_state_shutting_down,
+    pcmk_pacemakerd_state_shutdown_complete,
+    pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete,
+};
+
+//! Possible types of pacemakerd replies
+enum pcmk_pacemakerd_api_reply {
+    pcmk_pacemakerd_reply_unknown,
+    pcmk_pacemakerd_reply_ping,
+};
+
+/*!
+ * Pacemakerd reply passed to event callback
+ */
+typedef struct {
+    enum pcmk_pacemakerd_api_reply reply_type;
+
+    union {
+        // pcmk_pacemakerd_reply_ping
+        struct {
+            const char *sys_from;
+            enum pcmk_pacemakerd_state state;
+            time_t last_good;
+            int status;
+        } ping;
+    } data;
+} pcmk_pacemakerd_api_reply_t;
+
+int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name);
+enum pcmk_pacemakerd_state
+    pcmk_pacemakerd_api_daemon_state_text2enum(const char *state);
+const char
+    *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // PCMK__IPC_PACEMAKERD__H
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index af3f33e..1fcb72d 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -123,6 +123,13 @@ extern "C" {
 #  define XML_PING_ATTR_STATUS		"result"
 #  define XML_PING_ATTR_SYSFROM		"crm_subsystem"
 #  define XML_PING_ATTR_CRMDSTATE   "crmd_state"
+#  define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
+#  define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
 
 #  define XML_TAG_FRAGMENT		"cib_fragment"
 
diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am
index db66a6e..e0249b9 100644
--- a/lib/common/Makefile.am
+++ b/lib/common/Makefile.am
@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES	+= io.c
 libcrmcommon_la_SOURCES	+= ipc_client.c
 libcrmcommon_la_SOURCES	+= ipc_common.c
 libcrmcommon_la_SOURCES	+= ipc_controld.c
+libcrmcommon_la_SOURCES	+= ipc_pacemakerd.c
 libcrmcommon_la_SOURCES	+= ipc_server.c
 libcrmcommon_la_SOURCES	+= iso8601.c
 libcrmcommon_la_SOURCES	+= logging.c
diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
index d55df99..68e3390 100644
--- a/lib/common/crmcommon_private.h
+++ b/lib/common/crmcommon_private.h
@@ -210,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header);
 G_GNUC_INTERNAL
 pcmk__ipc_methods_t *pcmk__controld_api_methods(void);
 
+G_GNUC_INTERNAL
+pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void);
+
 #endif  // CRMCOMMON_PRIVATE__H
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index aa032fe..033199d 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -41,7 +41,7 @@
  *
  * \note The caller is responsible for freeing *api using pcmk_free_ipc_api().
  * \note This is intended to supersede crm_ipc_new() but currently only
- *       supports the controller IPC API.
+ *       supports the controller & pacemakerd IPC API.
  */
 int
 pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
@@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
             break;
 
         case pcmk_ipc_pacemakerd:
+            (*api)->cmds = pcmk__pacemakerd_api_methods();
             break;
 
         case pcmk_ipc_schedulerd:
@@ -259,7 +260,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log)
             return for_log? "fencer" : NULL /* "stonith-ng" */;
 
         case pcmk_ipc_pacemakerd:
-            return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */;
+            return for_log? "launcher" : CRM_SYSTEM_MCP;
 
         case pcmk_ipc_schedulerd:
             return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */;
diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c
new file mode 100644
index 0000000..241722e
--- /dev/null
+++ b/lib/common/ipc_pacemakerd.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdlib.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/ipc_pacemakerd.h>
+#include "crmcommon_private.h"
+
+typedef struct pacemakerd_api_private_s {
+    enum pcmk_pacemakerd_state state;
+    char *client_uuid;
+} pacemakerd_api_private_t;
+
+static const char *pacemakerd_state_str[] = {
+    XML_PING_ATTR_PACEMAKERDSTATE_INIT,
+    XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS,
+    XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
+    XML_PING_ATTR_PACEMAKERDSTATE_RUNNING,
+    XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN,
+    XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE
+};
+
+enum pcmk_pacemakerd_state
+pcmk_pacemakerd_api_daemon_state_text2enum(const char *state)
+{
+    int i;
+
+    if (state == NULL) {
+        return pcmk_pacemakerd_state_invalid;
+    }
+    for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max;
+         i++) {
+        if (crm_str_eq(state, pacemakerd_state_str[i], TRUE)) {
+            return i;
+        }
+    }
+    return pcmk_pacemakerd_state_invalid;
+}
+
+const char *
+pcmk_pacemakerd_api_daemon_state_enum2text(
+    enum pcmk_pacemakerd_state state)
+{
+    if ((state >= pcmk_pacemakerd_state_init) &&
+        (state <= pcmk_pacemakerd_state_max)) {
+        return pacemakerd_state_str[state];
+    }
+    return "invalid";
+}
+
+// \return Standard Pacemaker return code
+static int
+new_data(pcmk_ipc_api_t *api)
+{
+    struct pacemakerd_api_private_s *private = NULL;
+
+    api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s));
+
+    if (api->api_data == NULL) {
+        return errno;
+    }
+
+    private = api->api_data;
+    private->state = pcmk_pacemakerd_state_invalid;
+    /* other as with cib, controld, ... we are addressing pacemakerd just
+       from the local node -> pid is unique and thus sufficient as an ID
+     */
+    private->client_uuid = pcmk__getpid_s();
+
+    return pcmk_rc_ok;
+}
+
+static void
+free_data(void *data)
+{
+    free(((struct pacemakerd_api_private_s *) data)->client_uuid);
+    free(data);
+}
+
+// \return Standard Pacemaker return code
+static int
+post_connect(pcmk_ipc_api_t *api)
+{
+    struct pacemakerd_api_private_s *private = NULL;
+
+    if (api->api_data == NULL) {
+        return EINVAL;
+    }
+    private = api->api_data;
+    private->state = pcmk_pacemakerd_state_invalid;
+
+    return pcmk_rc_ok;
+}
+
+static void
+post_disconnect(pcmk_ipc_api_t *api)
+{
+    struct pacemakerd_api_private_s *private = NULL;
+
+    if (api->api_data == NULL) {
+        return;
+    }
+    private = api->api_data;
+    private->state = pcmk_pacemakerd_state_invalid;
+
+    return;
+}
+
+static bool
+reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
+{
+    const char *command = crm_element_value(request, F_CRM_TASK);
+
+    if (command == NULL) {
+        return false;
+    }
+
+    // We only need to handle commands that functions in this file can send
+    return !strcmp(command, CRM_OP_PING);
+}
+
+static void
+dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
+{
+    crm_exit_t status = CRM_EX_OK;
+    xmlNode *msg_data = NULL;
+    pcmk_pacemakerd_api_reply_t reply_data = {
+        pcmk_pacemakerd_reply_unknown
+    };
+    const char *value = NULL;
+    long long value_ll = 0;
+
+    value = crm_element_value(reply, F_CRM_MSG_TYPE);
+    if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) {
+        crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'",
+                  crm_str(value));
+        status = CRM_EX_PROTOCOL;
+        goto done;
+    }
+
+    if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
+        crm_debug("Unrecognizable pacemakerd message: no reference");
+        status = CRM_EX_PROTOCOL;
+        goto done;
+    }
+
+    value = crm_element_value(reply, F_CRM_TASK);
+    if ((value == NULL) || strcmp(value, CRM_OP_PING)) {
+        crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value));
+        status = CRM_EX_PROTOCOL;
+        goto done;
+    }
+
+    // Parse useful info from reply
+
+    msg_data = get_message_xml(reply, F_CRM_DATA);
+    crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll);
+
+    reply_data.reply_type = pcmk_pacemakerd_reply_ping;
+    reply_data.data.ping.state =
+        pcmk_pacemakerd_api_daemon_state_text2enum(
+            crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE));
+    reply_data.data.ping.status =
+        crm_str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS),
+                   "ok", FALSE)?pcmk_rc_ok:pcmk_rc_error;
+    reply_data.data.ping.last_good = (time_t) value_ll;
+    reply_data.data.ping.sys_from = crm_element_value(msg_data,
+                                        XML_PING_ATTR_SYSFROM);
+
+done:
+    pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
+}
+
+pcmk__ipc_methods_t *
+pcmk__pacemakerd_api_methods()
+{
+    pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
+
+    if (cmds != NULL) {
+        cmds->new_data = new_data;
+        cmds->free_data = free_data;
+        cmds->post_connect = post_connect;
+        cmds->reply_expected = reply_expected;
+        cmds->dispatch = dispatch;
+        cmds->post_disconnect = post_disconnect;
+    }
+    return cmds;
+}
+
+int
+pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name)
+{
+    pacemakerd_api_private_t *private;
+    xmlNode *cmd;
+    int rc;
+
+    CRM_CHECK(api != NULL, return -EINVAL);
+    private = api->api_data;
+    CRM_ASSERT(private != NULL);
+
+    cmd = create_request(CRM_OP_PING, NULL, NULL, CRM_SYSTEM_MCP,
+        ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")),
+        private->client_uuid);
+
+    if (cmd) {
+        rc = pcmk__send_ipc_request(api, cmd);
+        if (rc != pcmk_rc_ok) {
+            crm_debug("Couldn't ping pacemakerd: %s rc=%d",
+                pcmk_rc_str(rc), rc);
+            rc = ECOMM;
+        }
+        free_xml(cmd);
+    } else {
+        rc = ENOMSG;
+    }
+
+    return rc;
+}
-- 
1.8.3.1


From 06da3c3685b0bdf093a13067cc399e782115e39c Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 23:28:32 +0200
Subject: [PATCH 4/6] Feature: tools: Add -P to crmadmin to ping via
 pacemakerd-api

---
 include/crm/crm.h |   2 +-
 tools/crmadmin.c  | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/include/crm/crm.h b/include/crm/crm.h
index dc2adc1..ce2074b 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -51,7 +51,7 @@ extern "C" {
  * >=3.0.13: Fail counts include operation name and interval
  * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
  */
-#  define CRM_FEATURE_SET		"3.4.0"
+#  define CRM_FEATURE_SET		"3.4.1"
 
 #  define EOS		'\0'
 #  define DIMOF(a)	((int) (sizeof(a)/sizeof(a[0])) )
diff --git a/tools/crmadmin.c b/tools/crmadmin.c
index 4688458..2ebdd14 100644
--- a/tools/crmadmin.c
+++ b/tools/crmadmin.c
@@ -20,7 +20,9 @@
 #include <crm/cib.h>
 #include <crm/msg_xml.h>
 #include <crm/common/xml.h>
+#include <crm/common/iso8601.h>
 #include <crm/common/ipc_controld.h>
+#include <crm/common/ipc_pacemakerd.h>
 #include <crm/common/mainloop.h>
 
 #define DEFAULT_MESSAGE_TIMEOUT_MS 30000
@@ -31,6 +33,8 @@ static GMainLoop *mainloop = NULL;
 
 bool do_work(pcmk_ipc_api_t *api);
 void do_find_node_list(xmlNode *xml_node);
+static char *ipc_name = NULL;
+
 gboolean admin_message_timeout(gpointer data);
 
 static enum {
@@ -40,6 +44,7 @@ static enum {
     cmd_elect_dc,
     cmd_whois_dc,
     cmd_list_nodes,
+    cmd_pacemakerd_health,
 } command = cmd_none;
 
 static gboolean BE_VERBOSE = FALSE;
@@ -82,6 +87,15 @@ static pcmk__cli_option_t long_options[] = {
         pcmk__option_default
     },
     {
+        "pacemakerd", no_argument, NULL, 'P',
+        "Display the status of local pacemakerd.", pcmk__option_default
+    },
+    {
+        "-spacer-", no_argument, NULL, '-',
+        "\n\tResult is the state of the sub-daemons watched by pacemakerd.\n",
+        pcmk__option_default
+    },
+    {
         "dc_lookup", no_argument, NULL, 'D',
         "Display the uname of the node co-ordinating the cluster.",
         pcmk__option_default
@@ -122,16 +136,21 @@ static pcmk__cli_option_t long_options[] = {
     {
         "bash-export", no_argument, NULL, 'B',
         "Display nodes as shell commands of the form 'export uname=uuid' "
-            "(valid with -N/--nodes)'\n",
+            "(valid with -N/--nodes)",
+        pcmk__option_default
+    },
+    {
+        "ipc-name", required_argument, NULL, 'i',
+        "Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).",
         pcmk__option_default
     },
     {
         "-spacer-", no_argument, NULL, '-',
-        "Notes:", pcmk__option_default
+        "\nNotes:", pcmk__option_default
     },
     {
         "-spacer-", no_argument, NULL, '-',
-        "The -K and -E commands do not work and may be removed in a future "
+        "\nThe -K and -E commands do not work and may be removed in a future "
             "version.",
         pcmk__option_default
     },
@@ -223,6 +242,88 @@ done:
     quit_main_loop(exit_code);
 }
 
+static void
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
+                    enum pcmk_ipc_event event_type, crm_exit_t status,
+                    void *event_data, void *user_data)
+{
+    pcmk_pacemakerd_api_reply_t *reply = event_data;
+
+    switch (event_type) {
+        case pcmk_ipc_event_disconnect:
+            if (exit_code == CRM_EX_DISCONNECT) { // Unexpected
+                fprintf(stderr, "error: Lost connection to pacemakerd\n");
+            }
+            goto done;
+            break;
+
+        case pcmk_ipc_event_reply:
+            break;
+
+        default:
+            return;
+    }
+
+    if (message_timer_id != 0) {
+        g_source_remove(message_timer_id);
+        message_timer_id = 0;
+    }
+
+    if (status != CRM_EX_OK) {
+        fprintf(stderr, "error: Bad reply from pacemakerd: %s",
+                crm_exit_str(status));
+        exit_code = status;
+        goto done;
+    }
+
+    if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
+        fprintf(stderr, "error: Unknown reply type %d from pacemakerd\n",
+                reply->reply_type);
+        goto done;
+    }
+
+    // Parse desired information from reply
+    switch (command) {
+        case cmd_pacemakerd_health:
+            {
+                crm_time_t *crm_when = crm_time_new(NULL);
+                char *pinged_buf = NULL;
+
+                crm_time_set_timet(crm_when, &reply->data.ping.last_good);
+                pinged_buf = crm_time_as_string(crm_when,
+                    crm_time_log_date | crm_time_log_timeofday |
+                        crm_time_log_with_timezone);
+
+                printf("Status of %s: '%s' %s %s\n",
+                    reply->data.ping.sys_from,
+                    (reply->data.ping.status == pcmk_rc_ok)?
+                        pcmk_pacemakerd_api_daemon_state_enum2text(
+                            reply->data.ping.state):"query failed",
+                    (reply->data.ping.status == pcmk_rc_ok)?"last updated":"",
+                    (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:"");
+                if (BE_SILENT &&
+                    (reply->data.ping.state != pcmk_pacemakerd_state_invalid)) {
+                    fprintf(stderr, "%s\n",
+                        (reply->data.ping.status == pcmk_rc_ok)?
+                        pcmk_pacemakerd_api_daemon_state_enum2text(
+                            reply->data.ping.state):
+                        "query failed");
+                }
+                exit_code = CRM_EX_OK;
+                free(pinged_buf);
+            }
+            break;
+
+        default: // Not really possible here
+            exit_code = CRM_EX_SOFTWARE;
+            break;
+    }
+
+done:
+    pcmk_disconnect_ipc(pacemakerd_api);
+    quit_main_loop(exit_code);
+}
+
 // \return Standard Pacemaker return code
 static int
 list_nodes()
@@ -257,7 +358,9 @@ main(int argc, char **argv)
     int flag;
     int rc;
     pcmk_ipc_api_t *controld_api = NULL;
+    pcmk_ipc_api_t *pacemakerd_api = NULL;
     bool need_controld_api = true;
+    bool need_pacemakerd_api = false;
 
     crm_log_cli_init("crmadmin");
     pcmk__set_cli_options(NULL, "<command> [options]", long_options,
@@ -282,7 +385,9 @@ main(int argc, char **argv)
                     message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS;
                 }
                 break;
-
+            case 'i':
+                ipc_name = strdup(optarg);
+                break;
             case '$':
             case '?':
                 pcmk__cli_help(flag, CRM_EX_OK);
@@ -304,6 +409,11 @@ main(int argc, char **argv)
             case 'q':
                 BE_SILENT = TRUE;
                 break;
+            case 'P':
+                command = cmd_pacemakerd_health;
+                need_pacemakerd_api = true;
+                need_controld_api = false;
+                break;
             case 'S':
                 command = cmd_health;
                 crm_trace("Option %c => %s", flag, optarg);
@@ -369,7 +479,26 @@ main(int argc, char **argv)
         }
     }
 
-    if (do_work(controld_api)) {
+    // Connect to pacemakerd if needed
+    if (need_pacemakerd_api) {
+        rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
+        if (pacemakerd_api == NULL) {
+            fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
+                    pcmk_rc_str(rc));
+            exit_code = pcmk_rc2exitc(rc);
+            goto done;
+        }
+        pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL);
+        rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main);
+        if (rc != pcmk_rc_ok) {
+            fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
+                    pcmk_rc_str(rc));
+            exit_code = pcmk_rc2exitc(rc);
+            goto done;
+        }
+    }
+
+    if (do_work(controld_api?controld_api:pacemakerd_api)) {
         // A reply is needed from controller, so run main loop to get it
         exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects
         mainloop = g_main_loop_new(NULL, FALSE);
@@ -379,12 +508,19 @@ main(int argc, char **argv)
     }
 
 done:
+
     if (controld_api != NULL) {
         pcmk_ipc_api_t *capi = controld_api;
-
         controld_api = NULL; // Ensure we can't free this twice
         pcmk_free_ipc_api(capi);
     }
+
+    if (pacemakerd_api != NULL) {
+        pcmk_ipc_api_t *capi = pacemakerd_api;
+        pacemakerd_api = NULL; // Ensure we can't free this twice
+        pcmk_free_ipc_api(capi);
+    }
+
     if (mainloop != NULL) {
         g_main_loop_unref(mainloop);
         mainloop = NULL;
@@ -394,30 +530,35 @@ done:
 
 // \return True if reply from controller is needed
 bool
-do_work(pcmk_ipc_api_t *controld_api)
+do_work(pcmk_ipc_api_t *api)
 {
     bool need_reply = false;
     int rc = pcmk_rc_ok;
 
     switch (command) {
         case cmd_shutdown:
-            rc = pcmk_controld_api_shutdown(controld_api, dest_node);
+            rc = pcmk_controld_api_shutdown(api, dest_node);
             break;
 
         case cmd_health:    // dest_node != NULL
         case cmd_whois_dc:  // dest_node == NULL
-            rc = pcmk_controld_api_ping(controld_api, dest_node);
+            rc = pcmk_controld_api_ping(api, dest_node);
             need_reply = true;
             break;
 
         case cmd_elect_dc:
-            rc = pcmk_controld_api_start_election(controld_api);
+            rc = pcmk_controld_api_start_election(api);
             break;
 
         case cmd_list_nodes:
             rc = list_nodes();
             break;
 
+        case cmd_pacemakerd_health:
+            rc = pcmk_pacemakerd_api_ping(api, ipc_name);
+            need_reply = true;
+            break;
+
         case cmd_none: // not actually possible here
             break;
     }
-- 
1.8.3.1


From 6ce5bb0d6fd30a204468ea245209d34f2682d7c9 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 21 Jul 2020 18:12:53 +0200
Subject: [PATCH 5/6] Fix: pacemakerd: interworking with sbd not using
 pacemakerd-api

---
 daemons/pacemakerd/pacemakerd.c       |  8 +++++++-
 include/crm/common/options_internal.h |  1 +
 lib/common/watchdog.c                 | 15 +++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index ccfae66..e91982a 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -454,6 +454,7 @@ pcmk_shutdown_worker(gpointer user_data)
     crm_notice("Shutdown complete");
     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
     if (!fatal_error && running_with_sbd &&
+        pcmk__get_sbd_sync_resource_startup() &&
         !shutdown_complete_state_reported_client_closed) {
         return TRUE;
     }
@@ -1248,10 +1249,15 @@ main(int argc, char **argv)
     mainloop_add_signal(SIGTERM, pcmk_shutdown);
     mainloop_add_signal(SIGINT, pcmk_shutdown);
 
-    if (running_with_sbd) {
+    if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
         startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
     } else {
+        if (running_with_sbd) {
+            crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
+                     "by your sbd version) improve reliability of "
+                     "interworking between SBD & pacemaker.");
+        }
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
         init_children_processes(NULL);
     }
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index db54da4..d0429c9 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value);
 
 // from watchdog.c
 long pcmk__get_sbd_timeout(void);
+bool pcmk__get_sbd_sync_resource_startup(void);
 long pcmk__auto_watchdog_timeout(void);
 bool pcmk__valid_sbd_timeout(const char *value);
 
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
index 9d8896b..8838be6 100644
--- a/lib/common/watchdog.c
+++ b/lib/common/watchdog.c
@@ -227,6 +227,21 @@ pcmk__get_sbd_timeout(void)
     return sbd_timeout;
 }
 
+bool
+pcmk__get_sbd_sync_resource_startup(void)
+{
+    static bool sync_resource_startup = false;
+    static bool checked_sync_resource_startup = false;
+
+    if (!checked_sync_resource_startup) {
+        sync_resource_startup =
+            crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
+        checked_sync_resource_startup = true;
+    }
+
+    return sync_resource_startup;
+}
+
 long
 pcmk__auto_watchdog_timeout()
 {
-- 
1.8.3.1


From 567cb6ec6f317af9e973321633950ef26f43c486 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Thu, 23 Jul 2020 23:00:23 +0200
Subject: [PATCH 6/6] Fix: pacemakerd: improve logging when synced with SBD

---
 daemons/pacemakerd/pacemakerd.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index e91982a..c888b73 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -456,6 +456,7 @@ pcmk_shutdown_worker(gpointer user_data)
     if (!fatal_error && running_with_sbd &&
         pcmk__get_sbd_sync_resource_startup() &&
         !shutdown_complete_state_reported_client_closed) {
+        crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
         return TRUE;
     }
 
@@ -546,10 +547,14 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
         if (crm_str_eq(pacemakerd_state,
                        XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
                        TRUE)) {
+            if (pcmk__get_sbd_sync_resource_startup()) {
+                crm_notice("Shutdown-complete-state passed to SBD.");
+            }
             shutdown_complete_state_reported_to = c->pid;
         } else if (crm_str_eq(pacemakerd_state,
                               XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
                               TRUE)) {
+            crm_notice("Received startup-trigger from SBD.");
             pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
             mainloop_set_trigger(startup_trigger);
         }
@@ -1250,12 +1255,13 @@ main(int argc, char **argv)
     mainloop_add_signal(SIGINT, pcmk_shutdown);
 
     if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
+        crm_notice("Waiting for startup-trigger from SBD.");
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
         startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
     } else {
         if (running_with_sbd) {
             crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
-                     "by your sbd version) improve reliability of "
+                     "by your SBD version) improve reliability of "
                      "interworking between SBD & pacemaker.");
         }
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
-- 
1.8.3.1