945d2e
From 17d5ceac78f610aabf6a3678813706faf252c2fb Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Mon, 20 Jul 2020 17:56:29 +0200
945d2e
Subject: [PATCH 1/6] Fix: ipc-api: allow calling connect after disconnection
945d2e
945d2e
---
945d2e
 lib/common/crmcommon_private.h |  1 +
945d2e
 lib/common/ipc_client.c        | 22 ++++++++++++++++------
945d2e
 2 files changed, 17 insertions(+), 6 deletions(-)
945d2e
945d2e
diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
945d2e
index 49dae6c..d55df99 100644
945d2e
--- a/lib/common/crmcommon_private.h
945d2e
+++ b/lib/common/crmcommon_private.h
945d2e
@@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s {
945d2e
 struct pcmk_ipc_api_s {
945d2e
     enum pcmk_ipc_server server;          // Daemon this IPC API instance is for
945d2e
     enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched
945d2e
+    size_t ipc_size_max;                  // maximum IPC buffer size
945d2e
     crm_ipc_t *ipc;                       // IPC connection
945d2e
     mainloop_io_t *mainloop_io;     // If using mainloop, I/O source for IPC
945d2e
     bool free_on_disconnect;        // Whether disconnect should free object
945d2e
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
945d2e
index 4077d61..df687da 100644
945d2e
--- a/lib/common/ipc_client.c
945d2e
+++ b/lib/common/ipc_client.c
945d2e
@@ -46,8 +46,6 @@
945d2e
 int
945d2e
 pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
 {
945d2e
-    size_t max_size = 0;
945d2e
-
945d2e
     if (api == NULL) {
945d2e
         return EINVAL;
945d2e
     }
945d2e
@@ -64,13 +62,15 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
         return EOPNOTSUPP;
945d2e
     }
945d2e
 
945d2e
+    (*api)->ipc_size_max = 0;
945d2e
+
945d2e
     // Set server methods and max_size (if not default)
945d2e
     switch (server) {
945d2e
         case pcmk_ipc_attrd:
945d2e
             break;
945d2e
 
945d2e
         case pcmk_ipc_based:
945d2e
-            max_size = 512 * 1024; // 512KB
945d2e
+            (*api)->ipc_size_max = 512 * 1024; // 512KB
945d2e
             break;
945d2e
 
945d2e
         case pcmk_ipc_controld:
945d2e
@@ -88,7 +88,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
 
945d2e
         case pcmk_ipc_schedulerd:
945d2e
             // @TODO max_size could vary by client, maybe take as argument?
945d2e
-            max_size = 5 * 1024 * 1024; // 5MB
945d2e
+            (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB
945d2e
             break;
945d2e
     }
945d2e
     if ((*api)->cmds == NULL) {
945d2e
@@ -97,7 +97,8 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
         return ENOMEM;
945d2e
     }
945d2e
 
945d2e
-    (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size);
945d2e
+    (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false),
945d2e
+                              (*api)->ipc_size_max);
945d2e
     if ((*api)->ipc == NULL) {
945d2e
         pcmk_free_ipc_api(*api);
945d2e
         *api = NULL;
945d2e
@@ -451,11 +452,20 @@ pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type)
945d2e
 {
945d2e
     int rc = pcmk_rc_ok;
945d2e
 
945d2e
-    if ((api == NULL) || (api->ipc == NULL)) {
945d2e
+    if (api == NULL) {
945d2e
         crm_err("Cannot connect to uninitialized API object");
945d2e
         return EINVAL;
945d2e
     }
945d2e
 
945d2e
+    if (api->ipc == NULL) {
945d2e
+        api->ipc = crm_ipc_new(pcmk_ipc_name(api, false),
945d2e
+                                  api->ipc_size_max);
945d2e
+        if (api->ipc == NULL) {
945d2e
+            crm_err("Failed to re-create IPC API");
945d2e
+            return ENOMEM;
945d2e
+        }
945d2e
+    }
945d2e
+
945d2e
     if (crm_ipc_connected(api->ipc)) {
945d2e
         crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true));
945d2e
         return pcmk_rc_ok;
945d2e
-- 
945d2e
1.8.3.1
945d2e
945d2e
945d2e
From e5ad1a6c54da48c86c8ab262abd4921cb37e998d Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Mon, 20 Jul 2020 18:18:01 +0200
945d2e
Subject: [PATCH 2/6] Fix: ipc-api: avoid infinite loop when disconnected
945d2e
945d2e
Happens when using pcmk_dispatch_ipc when dispatching without
945d2e
mainloop.
945d2e
---
945d2e
 lib/common/ipc_client.c | 2 +-
945d2e
 1 file changed, 1 insertion(+), 1 deletion(-)
945d2e
945d2e
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
945d2e
index df687da..aa032fe 100644
945d2e
--- a/lib/common/ipc_client.c
945d2e
+++ b/lib/common/ipc_client.c
945d2e
@@ -392,7 +392,7 @@ pcmk_dispatch_ipc(pcmk_ipc_api_t *api)
945d2e
     if (api == NULL) {
945d2e
         return;
945d2e
     }
945d2e
-    while (crm_ipc_ready(api->ipc)) {
945d2e
+    while (crm_ipc_ready(api->ipc) > 0) {
945d2e
         if (crm_ipc_read(api->ipc) > 0) {
945d2e
             dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api);
945d2e
         }
945d2e
-- 
945d2e
1.8.3.1
945d2e
945d2e
945d2e
From 927b43a57d5e8256fbce8fe0792f8ea228c57687 Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Mon, 9 Dec 2019 15:13:11 +0100
945d2e
Subject: [PATCH 3/6] Fix: sbd-integration: sync pacemakerd with sbd
945d2e
945d2e
Make pacemakerd wait to be pinged by sbd before starting
945d2e
sub-daemons. Pings further reply health-state and timestamp
945d2e
of last successful check. On shutdown bring down all the
945d2e
sub-daemons and wait to be polled for state by sbd before
945d2e
finally exiting pacemakerd.
945d2e
Add new api as not to make the xml-structure an external interface.
945d2e
---
945d2e
 daemons/pacemakerd/pacemakerd.c     | 100 ++++++++++++++--
945d2e
 include/crm/common/Makefile.am      |   2 +-
945d2e
 include/crm/common/ipc_pacemakerd.h |  71 +++++++++++
945d2e
 include/crm/msg_xml.h               |   7 ++
945d2e
 lib/common/Makefile.am              |   1 +
945d2e
 lib/common/crmcommon_private.h      |   3 +
945d2e
 lib/common/ipc_client.c             |   5 +-
945d2e
 lib/common/ipc_pacemakerd.c         | 232 ++++++++++++++++++++++++++++++++++++
945d2e
 8 files changed, 410 insertions(+), 11 deletions(-)
945d2e
 create mode 100644 include/crm/common/ipc_pacemakerd.h
945d2e
 create mode 100644 lib/common/ipc_pacemakerd.c
945d2e
945d2e
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
945d2e
index 652d6ca..ccfae66 100644
945d2e
--- a/daemons/pacemakerd/pacemakerd.c
945d2e
+++ b/daemons/pacemakerd/pacemakerd.c
945d2e
@@ -40,8 +40,25 @@ static bool global_keep_tracking = false;
945d2e
 #define PCMK_PROCESS_CHECK_INTERVAL 5
945d2e
 
945d2e
 static crm_trigger_t *shutdown_trigger = NULL;
945d2e
+static crm_trigger_t *startup_trigger = NULL;
945d2e
 static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
945d2e
 
945d2e
+/* state we report when asked via pacemakerd-api status-ping */
945d2e
+static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
945d2e
+static gboolean running_with_sbd = FALSE; /* local copy */
945d2e
+/* When contacted via pacemakerd-api by a client having sbd in
945d2e
+ * the name we assume it is sbd-daemon which wants to know
945d2e
+ * if pacemakerd shutdown gracefully.
945d2e
+ * Thus when everything is shutdown properly pacemakerd
945d2e
+ * waits till it has reported the graceful completion of
945d2e
+ * shutdown to sbd and just when sbd-client closes the
945d2e
+ * connection we can assume that the report has arrived
945d2e
+ * properly so that pacemakerd can finally exit.
945d2e
+ * Following two variables are used to track that handshake.
945d2e
+ */
945d2e
+static unsigned int shutdown_complete_state_reported_to = 0;
945d2e
+static gboolean shutdown_complete_state_reported_client_closed = FALSE;
945d2e
+
945d2e
 typedef struct pcmk_child_s {
945d2e
     pid_t pid;
945d2e
     long flag;
945d2e
@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data)
945d2e
 static gboolean
945d2e
 pcmk_shutdown_worker(gpointer user_data)
945d2e
 {
945d2e
-    static int phase = 0;
945d2e
+    static int phase = SIZEOF(pcmk_children);
945d2e
     static time_t next_log = 0;
945d2e
-    static int max = SIZEOF(pcmk_children);
945d2e
 
945d2e
     int lpc = 0;
945d2e
 
945d2e
-    if (phase == 0) {
945d2e
+    if (phase == SIZEOF(pcmk_children)) {
945d2e
         crm_notice("Shutting down Pacemaker");
945d2e
-        phase = max;
945d2e
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
945d2e
     }
945d2e
 
945d2e
     for (; phase > 0; phase--) {
945d2e
         /* Don't stop anything with start_seq < 1 */
945d2e
 
945d2e
-        for (lpc = max - 1; lpc >= 0; lpc--) {
945d2e
+        for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
945d2e
             pcmk_child_t *child = &(pcmk_children[lpc]);
945d2e
 
945d2e
             if (phase != child->start_seq) {
945d2e
@@ -436,6 +452,11 @@ pcmk_shutdown_worker(gpointer user_data)
945d2e
     }
945d2e
 
945d2e
     crm_notice("Shutdown complete");
945d2e
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
945d2e
+    if (!fatal_error && running_with_sbd &&
945d2e
+        !shutdown_complete_state_reported_client_closed) {
945d2e
+        return TRUE;
945d2e
+    }
945d2e
 
945d2e
     {
945d2e
         const char *delay = pcmk__env_option("shutdown_delay");
945d2e
@@ -489,6 +510,51 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
945d2e
     return 0;
945d2e
 }
945d2e
 
945d2e
+static void
945d2e
+pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
945d2e
+{
945d2e
+    const char *value = NULL;
945d2e
+    xmlNode *ping = NULL;
945d2e
+    xmlNode *reply = NULL;
945d2e
+    time_t pinged = time(NULL);
945d2e
+    const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
945d2e
+
945d2e
+    /* Pinged for status */
945d2e
+    crm_trace("Pinged from %s.%s",
945d2e
+              crm_str(crm_element_value(msg, F_CRM_ORIGIN)),
945d2e
+              from?from:"unknown");
945d2e
+    ping = create_xml_node(NULL, XML_CRM_TAG_PING);
945d2e
+    value = crm_element_value(msg, F_CRM_SYS_TO);
945d2e
+    crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
945d2e
+    crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
945d2e
+    crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
945d2e
+    crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
945d2e
+    reply = create_reply(msg, ping);
945d2e
+    free_xml(ping);
945d2e
+    if (reply) {
945d2e
+        if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
945d2e
+                pcmk_rc_ok) {
945d2e
+            crm_err("Failed sending ping-reply");
945d2e
+        }
945d2e
+        free_xml(reply);
945d2e
+    } else {
945d2e
+        crm_err("Failed building ping-reply");
945d2e
+    }
945d2e
+    /* just proceed state on sbd pinging us */
945d2e
+    if (from && strstr(from, "sbd")) {
945d2e
+        if (crm_str_eq(pacemakerd_state,
945d2e
+                       XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
945d2e
+                       TRUE)) {
945d2e
+            shutdown_complete_state_reported_to = c->pid;
945d2e
+        } else if (crm_str_eq(pacemakerd_state,
945d2e
+                              XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
945d2e
+                              TRUE)) {
945d2e
+            pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
945d2e
+            mainloop_set_trigger(startup_trigger);
945d2e
+        }
945d2e
+    }
945d2e
+}
945d2e
+
945d2e
 /* Exit code means? */
945d2e
 static int32_t
945d2e
 pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
945d2e
@@ -514,6 +580,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
945d2e
         crm_trace("Ignoring IPC request to purge node "
945d2e
                   "because peer cache is not used");
945d2e
 
945d2e
+    } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
945d2e
+        pcmk_handle_ping_request(c, msg, id);
945d2e
+
945d2e
     } else {
945d2e
         crm_debug("Unrecognized IPC command '%s' sent to pacemakerd",
945d2e
                   crm_str(task));
945d2e
@@ -533,6 +602,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
945d2e
         return 0;
945d2e
     }
945d2e
     crm_trace("Connection %p", c);
945d2e
+    if (shutdown_complete_state_reported_to == client->pid) {
945d2e
+        shutdown_complete_state_reported_client_closed = TRUE;
945d2e
+        if (shutdown_trigger) {
945d2e
+            mainloop_set_trigger(shutdown_trigger);
945d2e
+        }
945d2e
+    }
945d2e
     pcmk__free_client(client);
945d2e
     return 0;
945d2e
 }
945d2e
@@ -924,8 +999,8 @@ find_and_track_existing_processes(void)
945d2e
     return pcmk_rc_ok;
945d2e
 }
945d2e
 
945d2e
-static void
945d2e
-init_children_processes(void)
945d2e
+static gboolean
945d2e
+init_children_processes(void *user_data)
945d2e
 {
945d2e
     int start_seq = 1, lpc = 0;
945d2e
     static int max = SIZEOF(pcmk_children);
945d2e
@@ -951,6 +1026,8 @@ init_children_processes(void)
945d2e
      * This may be useful for the daemons to know
945d2e
      */
945d2e
     setenv("PCMK_respawned", "true", 1);
945d2e
+    pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
945d2e
+    return TRUE;
945d2e
 }
945d2e
 
945d2e
 static void
945d2e
@@ -1154,6 +1231,7 @@ main(int argc, char **argv)
945d2e
 
945d2e
     if(pcmk_locate_sbd() > 0) {
945d2e
         setenv("PCMK_watchdog", "true", 1);
945d2e
+        running_with_sbd = TRUE;
945d2e
     } else {
945d2e
         setenv("PCMK_watchdog", "false", 1);
945d2e
     }
945d2e
@@ -1170,7 +1248,13 @@ main(int argc, char **argv)
945d2e
     mainloop_add_signal(SIGTERM, pcmk_shutdown);
945d2e
     mainloop_add_signal(SIGINT, pcmk_shutdown);
945d2e
 
945d2e
-    init_children_processes();
945d2e
+    if (running_with_sbd) {
945d2e
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
945d2e
+        startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
945d2e
+    } else {
945d2e
+        pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
945d2e
+        init_children_processes(NULL);
945d2e
+    }
945d2e
 
945d2e
     crm_notice("Pacemaker daemon successfully started and accepting connections");
945d2e
     g_main_loop_run(mainloop);
945d2e
diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am
945d2e
index f29d105..1b5730a 100644
945d2e
--- a/include/crm/common/Makefile.am
945d2e
+++ b/include/crm/common/Makefile.am
945d2e
@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
945d2e
 headerdir=$(pkgincludedir)/crm/common
945d2e
 
945d2e
 header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
945d2e
-		 nvpair.h acl.h ipc_controld.h
945d2e
+		 nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h
945d2e
 noinst_HEADERS = internal.h alerts_internal.h \
945d2e
 		 iso8601_internal.h remote_internal.h xml_internal.h \
945d2e
 		 ipc_internal.h output.h cmdline_internal.h curses_internal.h \
945d2e
diff --git a/include/crm/common/ipc_pacemakerd.h b/include/crm/common/ipc_pacemakerd.h
945d2e
new file mode 100644
945d2e
index 0000000..00e3edd
945d2e
--- /dev/null
945d2e
+++ b/include/crm/common/ipc_pacemakerd.h
945d2e
@@ -0,0 +1,71 @@
945d2e
+/*
945d2e
+ * Copyright 2020 the Pacemaker project contributors
945d2e
+ *
945d2e
+ * The version control history for this file may have further details.
945d2e
+ *
945d2e
+ * This source code is licensed under the GNU Lesser General Public License
945d2e
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
945d2e
+ */
945d2e
+
945d2e
+#ifndef PCMK__IPC_PACEMAKERD__H
945d2e
+#  define PCMK__IPC_PACEMAKERD__H
945d2e
+
945d2e
+#ifdef __cplusplus
945d2e
+extern "C" {
945d2e
+#endif
945d2e
+
945d2e
+/**
945d2e
+ * \file
945d2e
+ * \brief IPC commands for Pacemakerd
945d2e
+ *
945d2e
+ * \ingroup core
945d2e
+ */
945d2e
+
945d2e
+#include <sys/types.h>       // time_t
945d2e
+#include <crm/common/ipc.h>  // pcmk_ipc_api_t
945d2e
+
945d2e
+enum pcmk_pacemakerd_state {
945d2e
+    pcmk_pacemakerd_state_invalid = -1,
945d2e
+    pcmk_pacemakerd_state_init = 0,
945d2e
+    pcmk_pacemakerd_state_starting_daemons,
945d2e
+    pcmk_pacemakerd_state_wait_for_ping,
945d2e
+    pcmk_pacemakerd_state_running,
945d2e
+    pcmk_pacemakerd_state_shutting_down,
945d2e
+    pcmk_pacemakerd_state_shutdown_complete,
945d2e
+    pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete,
945d2e
+};
945d2e
+
945d2e
+//! Possible types of pacemakerd replies
945d2e
+enum pcmk_pacemakerd_api_reply {
945d2e
+    pcmk_pacemakerd_reply_unknown,
945d2e
+    pcmk_pacemakerd_reply_ping,
945d2e
+};
945d2e
+
945d2e
+/*!
945d2e
+ * Pacemakerd reply passed to event callback
945d2e
+ */
945d2e
+typedef struct {
945d2e
+    enum pcmk_pacemakerd_api_reply reply_type;
945d2e
+
945d2e
+    union {
945d2e
+        // pcmk_pacemakerd_reply_ping
945d2e
+        struct {
945d2e
+            const char *sys_from;
945d2e
+            enum pcmk_pacemakerd_state state;
945d2e
+            time_t last_good;
945d2e
+            int status;
945d2e
+        } ping;
945d2e
+    } data;
945d2e
+} pcmk_pacemakerd_api_reply_t;
945d2e
+
945d2e
+int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name);
945d2e
+enum pcmk_pacemakerd_state
945d2e
+    pcmk_pacemakerd_api_daemon_state_text2enum(const char *state);
945d2e
+const char
945d2e
+    *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state);
945d2e
+
945d2e
+#ifdef __cplusplus
945d2e
+}
945d2e
+#endif
945d2e
+
945d2e
+#endif // PCMK__IPC_PACEMAKERD__H
945d2e
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
945d2e
index af3f33e..1fcb72d 100644
945d2e
--- a/include/crm/msg_xml.h
945d2e
+++ b/include/crm/msg_xml.h
945d2e
@@ -123,6 +123,13 @@ extern "C" {
945d2e
 #  define XML_PING_ATTR_STATUS		"result"
945d2e
 #  define XML_PING_ATTR_SYSFROM		"crm_subsystem"
945d2e
 #  define XML_PING_ATTR_CRMDSTATE   "crmd_state"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
945d2e
+#  define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
945d2e
 
945d2e
 #  define XML_TAG_FRAGMENT		"cib_fragment"
945d2e
 
945d2e
diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am
945d2e
index db66a6e..e0249b9 100644
945d2e
--- a/lib/common/Makefile.am
945d2e
+++ b/lib/common/Makefile.am
945d2e
@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES	+= io.c
945d2e
 libcrmcommon_la_SOURCES	+= ipc_client.c
945d2e
 libcrmcommon_la_SOURCES	+= ipc_common.c
945d2e
 libcrmcommon_la_SOURCES	+= ipc_controld.c
945d2e
+libcrmcommon_la_SOURCES	+= ipc_pacemakerd.c
945d2e
 libcrmcommon_la_SOURCES	+= ipc_server.c
945d2e
 libcrmcommon_la_SOURCES	+= iso8601.c
945d2e
 libcrmcommon_la_SOURCES	+= logging.c
945d2e
diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
945d2e
index d55df99..68e3390 100644
945d2e
--- a/lib/common/crmcommon_private.h
945d2e
+++ b/lib/common/crmcommon_private.h
945d2e
@@ -210,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header);
945d2e
 G_GNUC_INTERNAL
945d2e
 pcmk__ipc_methods_t *pcmk__controld_api_methods(void);
945d2e
 
945d2e
+G_GNUC_INTERNAL
945d2e
+pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void);
945d2e
+
945d2e
 #endif  // CRMCOMMON_PRIVATE__H
945d2e
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
945d2e
index aa032fe..033199d 100644
945d2e
--- a/lib/common/ipc_client.c
945d2e
+++ b/lib/common/ipc_client.c
945d2e
@@ -41,7 +41,7 @@
945d2e
  *
945d2e
  * \note The caller is responsible for freeing *api using pcmk_free_ipc_api().
945d2e
  * \note This is intended to supersede crm_ipc_new() but currently only
945d2e
- *       supports the controller IPC API.
945d2e
+ *       supports the controller & pacemakerd IPC API.
945d2e
  */
945d2e
 int
945d2e
 pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
@@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
945d2e
             break;
945d2e
 
945d2e
         case pcmk_ipc_pacemakerd:
945d2e
+            (*api)->cmds = pcmk__pacemakerd_api_methods();
945d2e
             break;
945d2e
 
945d2e
         case pcmk_ipc_schedulerd:
945d2e
@@ -259,7 +260,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log)
945d2e
             return for_log? "fencer" : NULL /* "stonith-ng" */;
945d2e
 
945d2e
         case pcmk_ipc_pacemakerd:
945d2e
-            return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */;
945d2e
+            return for_log? "launcher" : CRM_SYSTEM_MCP;
945d2e
 
945d2e
         case pcmk_ipc_schedulerd:
945d2e
             return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */;
945d2e
diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c
945d2e
new file mode 100644
945d2e
index 0000000..241722e
945d2e
--- /dev/null
945d2e
+++ b/lib/common/ipc_pacemakerd.c
945d2e
@@ -0,0 +1,232 @@
945d2e
+/*
945d2e
+ * Copyright 2020 the Pacemaker project contributors
945d2e
+ *
945d2e
+ * The version control history for this file may have further details.
945d2e
+ *
945d2e
+ * This source code is licensed under the GNU Lesser General Public License
945d2e
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
945d2e
+ */
945d2e
+
945d2e
+#include <crm_internal.h>
945d2e
+
945d2e
+#include <stdlib.h>
945d2e
+#include <time.h>
945d2e
+
945d2e
+#include <crm/crm.h>
945d2e
+#include <crm/msg_xml.h>
945d2e
+#include <crm/common/xml.h>
945d2e
+#include <crm/common/ipc.h>
945d2e
+#include <crm/common/ipc_internal.h>
945d2e
+#include <crm/common/ipc_pacemakerd.h>
945d2e
+#include "crmcommon_private.h"
945d2e
+
945d2e
+typedef struct pacemakerd_api_private_s {
945d2e
+    enum pcmk_pacemakerd_state state;
945d2e
+    char *client_uuid;
945d2e
+} pacemakerd_api_private_t;
945d2e
+
945d2e
+static const char *pacemakerd_state_str[] = {
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_INIT,
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS,
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_RUNNING,
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN,
945d2e
+    XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE
945d2e
+};
945d2e
+
945d2e
+enum pcmk_pacemakerd_state
945d2e
+pcmk_pacemakerd_api_daemon_state_text2enum(const char *state)
945d2e
+{
945d2e
+    int i;
945d2e
+
945d2e
+    if (state == NULL) {
945d2e
+        return pcmk_pacemakerd_state_invalid;
945d2e
+    }
945d2e
+    for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max;
945d2e
+         i++) {
945d2e
+        if (crm_str_eq(state, pacemakerd_state_str[i], TRUE)) {
945d2e
+            return i;
945d2e
+        }
945d2e
+    }
945d2e
+    return pcmk_pacemakerd_state_invalid;
945d2e
+}
945d2e
+
945d2e
+const char *
945d2e
+pcmk_pacemakerd_api_daemon_state_enum2text(
945d2e
+    enum pcmk_pacemakerd_state state)
945d2e
+{
945d2e
+    if ((state >= pcmk_pacemakerd_state_init) &&
945d2e
+        (state <= pcmk_pacemakerd_state_max)) {
945d2e
+        return pacemakerd_state_str[state];
945d2e
+    }
945d2e
+    return "invalid";
945d2e
+}
945d2e
+
945d2e
+// \return Standard Pacemaker return code
945d2e
+static int
945d2e
+new_data(pcmk_ipc_api_t *api)
945d2e
+{
945d2e
+    struct pacemakerd_api_private_s *private = NULL;
945d2e
+
945d2e
+    api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s));
945d2e
+
945d2e
+    if (api->api_data == NULL) {
945d2e
+        return errno;
945d2e
+    }
945d2e
+
945d2e
+    private = api->api_data;
945d2e
+    private->state = pcmk_pacemakerd_state_invalid;
945d2e
+    /* other as with cib, controld, ... we are addressing pacemakerd just
945d2e
+       from the local node -> pid is unique and thus sufficient as an ID
945d2e
+     */
945d2e
+    private->client_uuid = pcmk__getpid_s();
945d2e
+
945d2e
+    return pcmk_rc_ok;
945d2e
+}
945d2e
+
945d2e
+static void
945d2e
+free_data(void *data)
945d2e
+{
945d2e
+    free(((struct pacemakerd_api_private_s *) data)->client_uuid);
945d2e
+    free(data);
945d2e
+}
945d2e
+
945d2e
+// \return Standard Pacemaker return code
945d2e
+static int
945d2e
+post_connect(pcmk_ipc_api_t *api)
945d2e
+{
945d2e
+    struct pacemakerd_api_private_s *private = NULL;
945d2e
+
945d2e
+    if (api->api_data == NULL) {
945d2e
+        return EINVAL;
945d2e
+    }
945d2e
+    private = api->api_data;
945d2e
+    private->state = pcmk_pacemakerd_state_invalid;
945d2e
+
945d2e
+    return pcmk_rc_ok;
945d2e
+}
945d2e
+
945d2e
+static void
945d2e
+post_disconnect(pcmk_ipc_api_t *api)
945d2e
+{
945d2e
+    struct pacemakerd_api_private_s *private = NULL;
945d2e
+
945d2e
+    if (api->api_data == NULL) {
945d2e
+        return;
945d2e
+    }
945d2e
+    private = api->api_data;
945d2e
+    private->state = pcmk_pacemakerd_state_invalid;
945d2e
+
945d2e
+    return;
945d2e
+}
945d2e
+
945d2e
+static bool
945d2e
+reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
945d2e
+{
945d2e
+    const char *command = crm_element_value(request, F_CRM_TASK);
945d2e
+
945d2e
+    if (command == NULL) {
945d2e
+        return false;
945d2e
+    }
945d2e
+
945d2e
+    // We only need to handle commands that functions in this file can send
945d2e
+    return !strcmp(command, CRM_OP_PING);
945d2e
+}
945d2e
+
945d2e
+static void
945d2e
+dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
945d2e
+{
945d2e
+    crm_exit_t status = CRM_EX_OK;
945d2e
+    xmlNode *msg_data = NULL;
945d2e
+    pcmk_pacemakerd_api_reply_t reply_data = {
945d2e
+        pcmk_pacemakerd_reply_unknown
945d2e
+    };
945d2e
+    const char *value = NULL;
945d2e
+    long long value_ll = 0;
945d2e
+
945d2e
+    value = crm_element_value(reply, F_CRM_MSG_TYPE);
945d2e
+    if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) {
945d2e
+        crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'",
945d2e
+                  crm_str(value));
945d2e
+        status = CRM_EX_PROTOCOL;
945d2e
+        goto done;
945d2e
+    }
945d2e
+
945d2e
+    if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
945d2e
+        crm_debug("Unrecognizable pacemakerd message: no reference");
945d2e
+        status = CRM_EX_PROTOCOL;
945d2e
+        goto done;
945d2e
+    }
945d2e
+
945d2e
+    value = crm_element_value(reply, F_CRM_TASK);
945d2e
+    if ((value == NULL) || strcmp(value, CRM_OP_PING)) {
945d2e
+        crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value));
945d2e
+        status = CRM_EX_PROTOCOL;
945d2e
+        goto done;
945d2e
+    }
945d2e
+
945d2e
+    // Parse useful info from reply
945d2e
+
945d2e
+    msg_data = get_message_xml(reply, F_CRM_DATA);
945d2e
+    crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll);
945d2e
+
945d2e
+    reply_data.reply_type = pcmk_pacemakerd_reply_ping;
945d2e
+    reply_data.data.ping.state =
945d2e
+        pcmk_pacemakerd_api_daemon_state_text2enum(
945d2e
+            crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE));
945d2e
+    reply_data.data.ping.status =
945d2e
+        crm_str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS),
945d2e
+                   "ok", FALSE)?pcmk_rc_ok:pcmk_rc_error;
945d2e
+    reply_data.data.ping.last_good = (time_t) value_ll;
945d2e
+    reply_data.data.ping.sys_from = crm_element_value(msg_data,
945d2e
+                                        XML_PING_ATTR_SYSFROM);
945d2e
+
945d2e
+done:
945d2e
+    pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
945d2e
+}
945d2e
+
945d2e
+pcmk__ipc_methods_t *
945d2e
+pcmk__pacemakerd_api_methods()
945d2e
+{
945d2e
+    pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
945d2e
+
945d2e
+    if (cmds != NULL) {
945d2e
+        cmds->new_data = new_data;
945d2e
+        cmds->free_data = free_data;
945d2e
+        cmds->post_connect = post_connect;
945d2e
+        cmds->reply_expected = reply_expected;
945d2e
+        cmds->dispatch = dispatch;
945d2e
+        cmds->post_disconnect = post_disconnect;
945d2e
+    }
945d2e
+    return cmds;
945d2e
+}
945d2e
+
945d2e
+int
945d2e
+pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name)
945d2e
+{
945d2e
+    pacemakerd_api_private_t *private;
945d2e
+    xmlNode *cmd;
945d2e
+    int rc;
945d2e
+
945d2e
+    CRM_CHECK(api != NULL, return -EINVAL);
945d2e
+    private = api->api_data;
945d2e
+    CRM_ASSERT(private != NULL);
945d2e
+
945d2e
+    cmd = create_request(CRM_OP_PING, NULL, NULL, CRM_SYSTEM_MCP,
945d2e
+        ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")),
945d2e
+        private->client_uuid);
945d2e
+
945d2e
+    if (cmd) {
945d2e
+        rc = pcmk__send_ipc_request(api, cmd);
945d2e
+        if (rc != pcmk_rc_ok) {
945d2e
+            crm_debug("Couldn't ping pacemakerd: %s rc=%d",
945d2e
+                pcmk_rc_str(rc), rc);
945d2e
+            rc = ECOMM;
945d2e
+        }
945d2e
+        free_xml(cmd);
945d2e
+    } else {
945d2e
+        rc = ENOMSG;
945d2e
+    }
945d2e
+
945d2e
+    return rc;
945d2e
+}
945d2e
-- 
945d2e
1.8.3.1
945d2e
945d2e
945d2e
From 06da3c3685b0bdf093a13067cc399e782115e39c Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Mon, 20 Jul 2020 23:28:32 +0200
945d2e
Subject: [PATCH 4/6] Feature: tools: Add -P to crmadmin to ping via
945d2e
 pacemakerd-api
945d2e
945d2e
---
945d2e
 include/crm/crm.h |   2 +-
945d2e
 tools/crmadmin.c  | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++----
945d2e
 2 files changed, 152 insertions(+), 11 deletions(-)
945d2e
945d2e
diff --git a/include/crm/crm.h b/include/crm/crm.h
945d2e
index dc2adc1..ce2074b 100644
945d2e
--- a/include/crm/crm.h
945d2e
+++ b/include/crm/crm.h
945d2e
@@ -51,7 +51,7 @@ extern "C" {
945d2e
  * >=3.0.13: Fail counts include operation name and interval
945d2e
  * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
945d2e
  */
945d2e
-#  define CRM_FEATURE_SET		"3.4.0"
945d2e
+#  define CRM_FEATURE_SET		"3.4.1"
945d2e
 
945d2e
 #  define EOS		'\0'
945d2e
 #  define DIMOF(a)	((int) (sizeof(a)/sizeof(a[0])) )
945d2e
diff --git a/tools/crmadmin.c b/tools/crmadmin.c
945d2e
index 4688458..2ebdd14 100644
945d2e
--- a/tools/crmadmin.c
945d2e
+++ b/tools/crmadmin.c
945d2e
@@ -20,7 +20,9 @@
945d2e
 #include <crm/cib.h>
945d2e
 #include <crm/msg_xml.h>
945d2e
 #include <crm/common/xml.h>
945d2e
+#include <crm/common/iso8601.h>
945d2e
 #include <crm/common/ipc_controld.h>
945d2e
+#include <crm/common/ipc_pacemakerd.h>
945d2e
 #include <crm/common/mainloop.h>
945d2e
 
945d2e
 #define DEFAULT_MESSAGE_TIMEOUT_MS 30000
945d2e
@@ -31,6 +33,8 @@ static GMainLoop *mainloop = NULL;
945d2e
 
945d2e
 bool do_work(pcmk_ipc_api_t *api);
945d2e
 void do_find_node_list(xmlNode *xml_node);
945d2e
+static char *ipc_name = NULL;
945d2e
+
945d2e
 gboolean admin_message_timeout(gpointer data);
945d2e
 
945d2e
 static enum {
945d2e
@@ -40,6 +44,7 @@ static enum {
945d2e
     cmd_elect_dc,
945d2e
     cmd_whois_dc,
945d2e
     cmd_list_nodes,
945d2e
+    cmd_pacemakerd_health,
945d2e
 } command = cmd_none;
945d2e
 
945d2e
 static gboolean BE_VERBOSE = FALSE;
945d2e
@@ -82,6 +87,15 @@ static pcmk__cli_option_t long_options[] = {
945d2e
         pcmk__option_default
945d2e
     },
945d2e
     {
945d2e
+        "pacemakerd", no_argument, NULL, 'P',
945d2e
+        "Display the status of local pacemakerd.", pcmk__option_default
945d2e
+    },
945d2e
+    {
945d2e
+        "-spacer-", no_argument, NULL, '-',
945d2e
+        "\n\tResult is the state of the sub-daemons watched by pacemakerd.\n",
945d2e
+        pcmk__option_default
945d2e
+    },
945d2e
+    {
945d2e
         "dc_lookup", no_argument, NULL, 'D',
945d2e
         "Display the uname of the node co-ordinating the cluster.",
945d2e
         pcmk__option_default
945d2e
@@ -122,16 +136,21 @@ static pcmk__cli_option_t long_options[] = {
945d2e
     {
945d2e
         "bash-export", no_argument, NULL, 'B',
945d2e
         "Display nodes as shell commands of the form 'export uname=uuid' "
945d2e
-            "(valid with -N/--nodes)'\n",
945d2e
+            "(valid with -N/--nodes)",
945d2e
+        pcmk__option_default
945d2e
+    },
945d2e
+    {
945d2e
+        "ipc-name", required_argument, NULL, 'i',
945d2e
+        "Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).",
945d2e
         pcmk__option_default
945d2e
     },
945d2e
     {
945d2e
         "-spacer-", no_argument, NULL, '-',
945d2e
-        "Notes:", pcmk__option_default
945d2e
+        "\nNotes:", pcmk__option_default
945d2e
     },
945d2e
     {
945d2e
         "-spacer-", no_argument, NULL, '-',
945d2e
-        "The -K and -E commands do not work and may be removed in a future "
945d2e
+        "\nThe -K and -E commands do not work and may be removed in a future "
945d2e
             "version.",
945d2e
         pcmk__option_default
945d2e
     },
945d2e
@@ -223,6 +242,88 @@ done:
945d2e
     quit_main_loop(exit_code);
945d2e
 }
945d2e
 
945d2e
+static void
945d2e
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
945d2e
+                    enum pcmk_ipc_event event_type, crm_exit_t status,
945d2e
+                    void *event_data, void *user_data)
945d2e
+{
945d2e
+    pcmk_pacemakerd_api_reply_t *reply = event_data;
945d2e
+
945d2e
+    switch (event_type) {
945d2e
+        case pcmk_ipc_event_disconnect:
945d2e
+            if (exit_code == CRM_EX_DISCONNECT) { // Unexpected
945d2e
+                fprintf(stderr, "error: Lost connection to pacemakerd\n");
945d2e
+            }
945d2e
+            goto done;
945d2e
+            break;
945d2e
+
945d2e
+        case pcmk_ipc_event_reply:
945d2e
+            break;
945d2e
+
945d2e
+        default:
945d2e
+            return;
945d2e
+    }
945d2e
+
945d2e
+    if (message_timer_id != 0) {
945d2e
+        g_source_remove(message_timer_id);
945d2e
+        message_timer_id = 0;
945d2e
+    }
945d2e
+
945d2e
+    if (status != CRM_EX_OK) {
945d2e
+        fprintf(stderr, "error: Bad reply from pacemakerd: %s",
945d2e
+                crm_exit_str(status));
945d2e
+        exit_code = status;
945d2e
+        goto done;
945d2e
+    }
945d2e
+
945d2e
+    if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
945d2e
+        fprintf(stderr, "error: Unknown reply type %d from pacemakerd\n",
945d2e
+                reply->reply_type);
945d2e
+        goto done;
945d2e
+    }
945d2e
+
945d2e
+    // Parse desired information from reply
945d2e
+    switch (command) {
945d2e
+        case cmd_pacemakerd_health:
945d2e
+            {
945d2e
+                crm_time_t *crm_when = crm_time_new(NULL);
945d2e
+                char *pinged_buf = NULL;
945d2e
+
945d2e
+                crm_time_set_timet(crm_when, &reply->data.ping.last_good);
945d2e
+                pinged_buf = crm_time_as_string(crm_when,
945d2e
+                    crm_time_log_date | crm_time_log_timeofday |
945d2e
+                        crm_time_log_with_timezone);
945d2e
+
945d2e
+                printf("Status of %s: '%s' %s %s\n",
945d2e
+                    reply->data.ping.sys_from,
945d2e
+                    (reply->data.ping.status == pcmk_rc_ok)?
945d2e
+                        pcmk_pacemakerd_api_daemon_state_enum2text(
945d2e
+                            reply->data.ping.state):"query failed",
945d2e
+                    (reply->data.ping.status == pcmk_rc_ok)?"last updated":"",
945d2e
+                    (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:"");
945d2e
+                if (BE_SILENT &&
945d2e
+                    (reply->data.ping.state != pcmk_pacemakerd_state_invalid)) {
945d2e
+                    fprintf(stderr, "%s\n",
945d2e
+                        (reply->data.ping.status == pcmk_rc_ok)?
945d2e
+                        pcmk_pacemakerd_api_daemon_state_enum2text(
945d2e
+                            reply->data.ping.state):
945d2e
+                        "query failed");
945d2e
+                }
945d2e
+                exit_code = CRM_EX_OK;
945d2e
+                free(pinged_buf);
945d2e
+            }
945d2e
+            break;
945d2e
+
945d2e
+        default: // Not really possible here
945d2e
+            exit_code = CRM_EX_SOFTWARE;
945d2e
+            break;
945d2e
+    }
945d2e
+
945d2e
+done:
945d2e
+    pcmk_disconnect_ipc(pacemakerd_api);
945d2e
+    quit_main_loop(exit_code);
945d2e
+}
945d2e
+
945d2e
 // \return Standard Pacemaker return code
945d2e
 static int
945d2e
 list_nodes()
945d2e
@@ -257,7 +358,9 @@ main(int argc, char **argv)
945d2e
     int flag;
945d2e
     int rc;
945d2e
     pcmk_ipc_api_t *controld_api = NULL;
945d2e
+    pcmk_ipc_api_t *pacemakerd_api = NULL;
945d2e
     bool need_controld_api = true;
945d2e
+    bool need_pacemakerd_api = false;
945d2e
 
945d2e
     crm_log_cli_init("crmadmin");
945d2e
     pcmk__set_cli_options(NULL, "<command> [options]", long_options,
945d2e
@@ -282,7 +385,9 @@ main(int argc, char **argv)
945d2e
                     message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS;
945d2e
                 }
945d2e
                 break;
945d2e
-
945d2e
+            case 'i':
945d2e
+                ipc_name = strdup(optarg);
945d2e
+                break;
945d2e
             case '$':
945d2e
             case '?':
945d2e
                 pcmk__cli_help(flag, CRM_EX_OK);
945d2e
@@ -304,6 +409,11 @@ main(int argc, char **argv)
945d2e
             case 'q':
945d2e
                 BE_SILENT = TRUE;
945d2e
                 break;
945d2e
+            case 'P':
945d2e
+                command = cmd_pacemakerd_health;
945d2e
+                need_pacemakerd_api = true;
945d2e
+                need_controld_api = false;
945d2e
+                break;
945d2e
             case 'S':
945d2e
                 command = cmd_health;
945d2e
                 crm_trace("Option %c => %s", flag, optarg);
945d2e
@@ -369,7 +479,26 @@ main(int argc, char **argv)
945d2e
         }
945d2e
     }
945d2e
 
945d2e
-    if (do_work(controld_api)) {
945d2e
+    // Connect to pacemakerd if needed
945d2e
+    if (need_pacemakerd_api) {
945d2e
+        rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
945d2e
+        if (pacemakerd_api == NULL) {
945d2e
+            fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
945d2e
+                    pcmk_rc_str(rc));
945d2e
+            exit_code = pcmk_rc2exitc(rc);
945d2e
+            goto done;
945d2e
+        }
945d2e
+        pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL);
945d2e
+        rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main);
945d2e
+        if (rc != pcmk_rc_ok) {
945d2e
+            fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
945d2e
+                    pcmk_rc_str(rc));
945d2e
+            exit_code = pcmk_rc2exitc(rc);
945d2e
+            goto done;
945d2e
+        }
945d2e
+    }
945d2e
+
945d2e
+    if (do_work(controld_api?controld_api:pacemakerd_api)) {
945d2e
         // A reply is needed from controller, so run main loop to get it
945d2e
         exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects
945d2e
         mainloop = g_main_loop_new(NULL, FALSE);
945d2e
@@ -379,12 +508,19 @@ main(int argc, char **argv)
945d2e
     }
945d2e
 
945d2e
 done:
945d2e
+
945d2e
     if (controld_api != NULL) {
945d2e
         pcmk_ipc_api_t *capi = controld_api;
945d2e
-
945d2e
         controld_api = NULL; // Ensure we can't free this twice
945d2e
         pcmk_free_ipc_api(capi);
945d2e
     }
945d2e
+
945d2e
+    if (pacemakerd_api != NULL) {
945d2e
+        pcmk_ipc_api_t *capi = pacemakerd_api;
945d2e
+        pacemakerd_api = NULL; // Ensure we can't free this twice
945d2e
+        pcmk_free_ipc_api(capi);
945d2e
+    }
945d2e
+
945d2e
     if (mainloop != NULL) {
945d2e
         g_main_loop_unref(mainloop);
945d2e
         mainloop = NULL;
945d2e
@@ -394,30 +530,35 @@ done:
945d2e
 
945d2e
 // \return True if reply from controller is needed
945d2e
 bool
945d2e
-do_work(pcmk_ipc_api_t *controld_api)
945d2e
+do_work(pcmk_ipc_api_t *api)
945d2e
 {
945d2e
     bool need_reply = false;
945d2e
     int rc = pcmk_rc_ok;
945d2e
 
945d2e
     switch (command) {
945d2e
         case cmd_shutdown:
945d2e
-            rc = pcmk_controld_api_shutdown(controld_api, dest_node);
945d2e
+            rc = pcmk_controld_api_shutdown(api, dest_node);
945d2e
             break;
945d2e
 
945d2e
         case cmd_health:    // dest_node != NULL
945d2e
         case cmd_whois_dc:  // dest_node == NULL
945d2e
-            rc = pcmk_controld_api_ping(controld_api, dest_node);
945d2e
+            rc = pcmk_controld_api_ping(api, dest_node);
945d2e
             need_reply = true;
945d2e
             break;
945d2e
 
945d2e
         case cmd_elect_dc:
945d2e
-            rc = pcmk_controld_api_start_election(controld_api);
945d2e
+            rc = pcmk_controld_api_start_election(api);
945d2e
             break;
945d2e
 
945d2e
         case cmd_list_nodes:
945d2e
             rc = list_nodes();
945d2e
             break;
945d2e
 
945d2e
+        case cmd_pacemakerd_health:
945d2e
+            rc = pcmk_pacemakerd_api_ping(api, ipc_name);
945d2e
+            need_reply = true;
945d2e
+            break;
945d2e
+
945d2e
         case cmd_none: // not actually possible here
945d2e
             break;
945d2e
     }
945d2e
-- 
945d2e
1.8.3.1
945d2e
945d2e
945d2e
From 6ce5bb0d6fd30a204468ea245209d34f2682d7c9 Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Tue, 21 Jul 2020 18:12:53 +0200
945d2e
Subject: [PATCH 5/6] Fix: pacemakerd: interworking with sbd not using
945d2e
 pacemakerd-api
945d2e
945d2e
---
945d2e
 daemons/pacemakerd/pacemakerd.c       |  8 +++++++-
945d2e
 include/crm/common/options_internal.h |  1 +
945d2e
 lib/common/watchdog.c                 | 15 +++++++++++++++
945d2e
 3 files changed, 23 insertions(+), 1 deletion(-)
945d2e
945d2e
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
945d2e
index ccfae66..e91982a 100644
945d2e
--- a/daemons/pacemakerd/pacemakerd.c
945d2e
+++ b/daemons/pacemakerd/pacemakerd.c
945d2e
@@ -454,6 +454,7 @@ pcmk_shutdown_worker(gpointer user_data)
945d2e
     crm_notice("Shutdown complete");
945d2e
     pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
945d2e
     if (!fatal_error && running_with_sbd &&
945d2e
+        pcmk__get_sbd_sync_resource_startup() &&
945d2e
         !shutdown_complete_state_reported_client_closed) {
945d2e
         return TRUE;
945d2e
     }
945d2e
@@ -1248,10 +1249,15 @@ main(int argc, char **argv)
945d2e
     mainloop_add_signal(SIGTERM, pcmk_shutdown);
945d2e
     mainloop_add_signal(SIGINT, pcmk_shutdown);
945d2e
 
945d2e
-    if (running_with_sbd) {
945d2e
+    if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
945d2e
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
945d2e
         startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
945d2e
     } else {
945d2e
+        if (running_with_sbd) {
945d2e
+            crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
945d2e
+                     "by your sbd version) improve reliability of "
945d2e
+                     "interworking between SBD & pacemaker.");
945d2e
+        }
945d2e
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
945d2e
         init_children_processes(NULL);
945d2e
     }
945d2e
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
945d2e
index db54da4..d0429c9 100644
945d2e
--- a/include/crm/common/options_internal.h
945d2e
+++ b/include/crm/common/options_internal.h
945d2e
@@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value);
945d2e
 
945d2e
 // from watchdog.c
945d2e
 long pcmk__get_sbd_timeout(void);
945d2e
+bool pcmk__get_sbd_sync_resource_startup(void);
945d2e
 long pcmk__auto_watchdog_timeout(void);
945d2e
 bool pcmk__valid_sbd_timeout(const char *value);
945d2e
 
945d2e
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
945d2e
index 9d8896b..8838be6 100644
945d2e
--- a/lib/common/watchdog.c
945d2e
+++ b/lib/common/watchdog.c
945d2e
@@ -227,6 +227,21 @@ pcmk__get_sbd_timeout(void)
945d2e
     return sbd_timeout;
945d2e
 }
945d2e
 
945d2e
+bool
945d2e
+pcmk__get_sbd_sync_resource_startup(void)
945d2e
+{
945d2e
+    static bool sync_resource_startup = false;
945d2e
+    static bool checked_sync_resource_startup = false;
945d2e
+
945d2e
+    if (!checked_sync_resource_startup) {
945d2e
+        sync_resource_startup =
945d2e
+            crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
945d2e
+        checked_sync_resource_startup = true;
945d2e
+    }
945d2e
+
945d2e
+    return sync_resource_startup;
945d2e
+}
945d2e
+
945d2e
 long
945d2e
 pcmk__auto_watchdog_timeout()
945d2e
 {
945d2e
-- 
945d2e
1.8.3.1
945d2e
945d2e
945d2e
From 567cb6ec6f317af9e973321633950ef26f43c486 Mon Sep 17 00:00:00 2001
945d2e
From: Klaus Wenninger <klaus.wenninger@aon.at>
945d2e
Date: Thu, 23 Jul 2020 23:00:23 +0200
945d2e
Subject: [PATCH 6/6] Fix: pacemakerd: improve logging when synced with SBD
945d2e
945d2e
---
945d2e
 daemons/pacemakerd/pacemakerd.c | 8 +++++++-
945d2e
 1 file changed, 7 insertions(+), 1 deletion(-)
945d2e
945d2e
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
945d2e
index e91982a..c888b73 100644
945d2e
--- a/daemons/pacemakerd/pacemakerd.c
945d2e
+++ b/daemons/pacemakerd/pacemakerd.c
945d2e
@@ -456,6 +456,7 @@ pcmk_shutdown_worker(gpointer user_data)
945d2e
     if (!fatal_error && running_with_sbd &&
945d2e
         pcmk__get_sbd_sync_resource_startup() &&
945d2e
         !shutdown_complete_state_reported_client_closed) {
945d2e
+        crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
945d2e
         return TRUE;
945d2e
     }
945d2e
 
945d2e
@@ -546,10 +547,14 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
945d2e
         if (crm_str_eq(pacemakerd_state,
945d2e
                        XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
945d2e
                        TRUE)) {
945d2e
+            if (pcmk__get_sbd_sync_resource_startup()) {
945d2e
+                crm_notice("Shutdown-complete-state passed to SBD.");
945d2e
+            }
945d2e
             shutdown_complete_state_reported_to = c->pid;
945d2e
         } else if (crm_str_eq(pacemakerd_state,
945d2e
                               XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
945d2e
                               TRUE)) {
945d2e
+            crm_notice("Received startup-trigger from SBD.");
945d2e
             pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
945d2e
             mainloop_set_trigger(startup_trigger);
945d2e
         }
945d2e
@@ -1250,12 +1255,13 @@ main(int argc, char **argv)
945d2e
     mainloop_add_signal(SIGINT, pcmk_shutdown);
945d2e
 
945d2e
     if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
945d2e
+        crm_notice("Waiting for startup-trigger from SBD.");
945d2e
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
945d2e
         startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
945d2e
     } else {
945d2e
         if (running_with_sbd) {
945d2e
             crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
945d2e
-                     "by your sbd version) improve reliability of "
945d2e
+                     "by your SBD version) improve reliability of "
945d2e
                      "interworking between SBD & pacemaker.");
945d2e
         }
945d2e
         pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
945d2e
-- 
945d2e
1.8.3.1
945d2e