587943
From 68139dc8ff5efbfd81d3b5e868462e7eaefa2c74 Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Mon, 25 Jan 2021 15:35:33 +0100
587943
Subject: [PATCH 1/7] Fix: crm_mon: add explicit void to one_shot prototype for
587943
 compat
587943
587943
---
587943
 tools/crm_mon.c | 2 +-
587943
 1 file changed, 1 insertion(+), 1 deletion(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 0981634..1eca1b7 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -1226,7 +1226,7 @@ handle_connection_failures(int rc)
587943
 }
587943
 
587943
 static void
587943
-one_shot()
587943
+one_shot(void)
587943
 {
587943
     int rc;
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 8c7a01f8880efff8457e8421c381082b250d4512 Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Mon, 25 Jan 2021 16:26:30 +0100
587943
Subject: [PATCH 2/7] Refactor: crm_mon: cib_connect &
587943
 handle_connection_failures -> new rc
587943
587943
---
587943
 tools/crm_mon.c | 62 ++++++++++++++++++++++++++++++++-------------------------
587943
 1 file changed, 35 insertions(+), 27 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 1eca1b7..3fbac5f 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -690,7 +690,7 @@ reconnect_after_timeout(gpointer data)
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
     fencing_connect();
587943
-    if (cib_connect(TRUE) == pcmk_ok) {
587943
+    if (cib_connect(TRUE) == pcmk_rc_ok) {
587943
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
         mon_refresh_display(NULL);
587943
         return FALSE;
587943
@@ -804,16 +804,17 @@ fencing_connect(void)
587943
 static int
587943
 cib_connect(gboolean full)
587943
 {
587943
-    int rc = pcmk_ok;
587943
+    int rc = pcmk_rc_ok;
587943
     static gboolean need_pass = TRUE;
587943
 
587943
-    CRM_CHECK(cib != NULL, return -EINVAL);
587943
+    CRM_CHECK(cib != NULL, return EINVAL);
587943
 
587943
     if (getenv("CIB_passwd") != NULL) {
587943
         need_pass = FALSE;
587943
     }
587943
 
587943
-    if (cib->state == cib_connected_query || cib->state == cib_connected_command) {
587943
+    if (cib->state == cib_connected_query ||
587943
+        cib->state == cib_connected_command) {
587943
         return rc;
587943
     }
587943
 
587943
@@ -825,37 +826,44 @@ cib_connect(gboolean full)
587943
      * @TODO Add a password prompt (maybe including input) function to
587943
      *       pcmk__output_t and use it in libcib.
587943
      */
587943
-    if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
587943
+    if ((output_format == mon_output_console) &&
587943
+         need_pass &&
587943
+         (cib->variant == cib_remote)) {
587943
         need_pass = FALSE;
587943
         print_as(output_format, "Password:");
587943
     }
587943
 
587943
-    rc = cib->cmds->signon(cib, crm_system_name, cib_query);
587943
-    if (rc != pcmk_ok) {
587943
+    rc = pcmk_legacy2rc(cib->cmds->signon(cib, crm_system_name, cib_query));
587943
+    if (rc != pcmk_rc_ok) {
587943
         out->err(out, "Could not connect to the CIB: %s",
587943
-                 pcmk_strerror(rc));
587943
+                 pcmk_rc_str(rc));
587943
         return rc;
587943
     }
587943
 
587943
-    rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
587943
+    rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, &current_cib,
587943
+                                         cib_scope_local | cib_sync_call));
587943
 
587943
-    if (rc == pcmk_ok && full) {
587943
-        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
587943
-        if (rc == -EPROTONOSUPPORT) {
587943
-            print_as
587943
-                (output_format, "Notification setup not supported, won't be able to reconnect after failure");
587943
+    if (rc == pcmk_rc_ok && full) {
587943
+        rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
587943
+            mon_cib_connection_destroy));
587943
+        if (rc == EPROTONOSUPPORT) {
587943
+            print_as(output_format,
587943
+                     "Notification setup not supported, won't be "
587943
+                     "able to reconnect after failure");
587943
             if (output_format == mon_output_console) {
587943
                 sleep(2);
587943
             }
587943
-            rc = pcmk_ok;
587943
+            rc = pcmk_rc_ok;
587943
         }
587943
 
587943
-        if (rc == pcmk_ok) {
587943
-            cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
587943
-            rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
587943
+        if (rc == pcmk_rc_ok) {
587943
+            cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY,
587943
+                                           crm_diff_update);
587943
+            rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib,
587943
+                                    T_CIB_DIFF_NOTIFY, crm_diff_update));
587943
         }
587943
 
587943
-        if (rc != pcmk_ok) {
587943
+        if (rc != pcmk_rc_ok) {
587943
             out->err(out, "Notification setup failed, could not monitor CIB actions");
587943
             clean_up_cib_connection();
587943
             clean_up_fencing_connection();
587943
@@ -1206,20 +1214,20 @@ reconcile_output_format(pcmk__common_args_t *args) {
587943
 static void
587943
 handle_connection_failures(int rc)
587943
 {
587943
-    if (rc == pcmk_ok) {
587943
+    if (rc == pcmk_rc_ok) {
587943
         return;
587943
     }
587943
 
587943
     if (output_format == mon_output_monitor) {
587943
         g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
587943
-                    pcmk_strerror(rc));
587943
+                    pcmk_rc_str(rc));
587943
         rc = MON_STATUS_CRIT;
587943
-    } else if (rc == -ENOTCONN) {
587943
+    } else if (rc == ENOTCONN) {
587943
         g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
587943
-        rc = crm_errno2exit(rc);
587943
+        rc = pcmk_rc2exitc(rc);
587943
     } else {
587943
-        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
587943
-        rc = crm_errno2exit(rc);
587943
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
587943
+        rc = pcmk_rc2exitc(rc);
587943
     }
587943
 
587943
     clean_up(rc);
587943
@@ -1478,7 +1486,7 @@ main(int argc, char **argv)
587943
         fencing_connect();
587943
         rc = cib_connect(TRUE);
587943
 
587943
-        if (rc != pcmk_ok) {
587943
+        if (rc != pcmk_rc_ok) {
587943
             sleep(options.reconnect_msec / 1000);
587943
 #if CURSES_ENABLED
587943
             if (output_format == mon_output_console) {
587943
@@ -1490,7 +1498,7 @@ main(int argc, char **argv)
587943
             printf("Writing html to %s ...\n", args->output_dest);
587943
         }
587943
 
587943
-    } while (rc == -ENOTCONN);
587943
+    } while (rc == ENOTCONN);
587943
 
587943
     handle_connection_failures(rc);
587943
     set_fencing_options(interactive_fence_level);
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 9b8fb7b608280f65a3b76d66a99b575a4da70944 Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Mon, 25 Jan 2021 18:26:04 +0100
587943
Subject: [PATCH 3/7] Fix: tools: Report pacemakerd in state waiting for sbd
587943
587943
Waiting for pacemakerd to report that all subdaemons are started
587943
before trying to connect to cib and fencer should remove the
587943
potential race introduced by making fencer connection failure
587943
non fatal when cib is faster to come up.
587943
---
587943
 tools/crm_mon.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
587943
 tools/crm_mon.h |   1 +
587943
 2 files changed, 148 insertions(+), 11 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 3fbac5f..61f070d 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -132,6 +132,7 @@ static void handle_connection_failures(int rc);
587943
 static int mon_refresh_display(gpointer user_data);
587943
 static int cib_connect(gboolean full);
587943
 static int fencing_connect(void);
587943
+static int pacemakerd_status(void);
587943
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
587943
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
587943
 static void refresh_after_event(gboolean data_updated);
587943
@@ -689,11 +690,13 @@ reconnect_after_timeout(gpointer data)
587943
     }
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
-    fencing_connect();
587943
-    if (cib_connect(TRUE) == pcmk_rc_ok) {
587943
-        /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
-        mon_refresh_display(NULL);
587943
-        return FALSE;
587943
+    if (pacemakerd_status() == pcmk_rc_ok) {
587943
+        fencing_connect();
587943
+        if (cib_connect(TRUE) == pcmk_rc_ok) {
587943
+            /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
+            mon_refresh_display(NULL);
587943
+            return FALSE;
587943
+        }
587943
     }
587943
 
587943
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
@@ -840,6 +843,13 @@ cib_connect(gboolean full)
587943
         return rc;
587943
     }
587943
 
587943
+#if CURSES_ENABLED
587943
+    /* just show this if refresh is gonna remove all traces */
587943
+    if (output_format == mon_output_console) {
587943
+        print_as(output_format ,"Waiting for CIB ...\n");
587943
+    }
587943
+#endif
587943
+
587943
     rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, &current_cib,
587943
                                          cib_scope_local | cib_sync_call));
587943
 
587943
@@ -904,6 +914,121 @@ set_fencing_options(int level)
587943
     }
587943
 }
587943
 
587943
+/* Before trying to connect to fencer or cib check for state of
587943
+   pacemakerd - just no sense in trying till pacemakerd has
587943
+   taken care of starting all the sub-processes
587943
+
587943
+   Only noteworthy thing to show here is when pacemakerd is
587943
+   waiting for startup-trigger from SBD.
587943
+ */
587943
+static void
587943
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
587943
+                    enum pcmk_ipc_event event_type, crm_exit_t status,
587943
+                    void *event_data, void *user_data)
587943
+{
587943
+    pcmk_pacemakerd_api_reply_t *reply = event_data;
587943
+    enum pcmk_pacemakerd_state *state =
587943
+        (enum pcmk_pacemakerd_state *) user_data;
587943
+
587943
+    /* we are just interested in the latest reply */
587943
+    *state = pcmk_pacemakerd_state_invalid;
587943
+
587943
+    switch (event_type) {
587943
+        case pcmk_ipc_event_reply:
587943
+            break;
587943
+
587943
+        default:
587943
+            return;
587943
+    }
587943
+
587943
+    if (status != CRM_EX_OK) {
587943
+        out->err(out, "Bad reply from pacemakerd: %s",
587943
+                 crm_exit_str(status));
587943
+        return;
587943
+    }
587943
+
587943
+    if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
587943
+        out->err(out, "Unknown reply type %d from pacemakerd",
587943
+                 reply->reply_type);
587943
+    } else {
587943
+        if ((reply->data.ping.last_good != (time_t) 0) &&
587943
+            (reply->data.ping.status == pcmk_rc_ok)) {
587943
+            *state = reply->data.ping.state;
587943
+        }
587943
+    }
587943
+}
587943
+
587943
+static int
587943
+pacemakerd_status(void)
587943
+{
587943
+    int rc = pcmk_rc_ok;
587943
+    pcmk_ipc_api_t *pacemakerd_api = NULL;
587943
+    enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid;
587943
+
587943
+    if (!pcmk_is_set(options.mon_ops, mon_op_cib_native)) {
587943
+        /* we don't need fully functional pacemakerd otherwise */
587943
+        return rc;
587943
+    }
587943
+    if (cib != NULL &&
587943
+        (cib->state == cib_connected_query ||
587943
+         cib->state == cib_connected_command)) {
587943
+        /* As long as we have a cib-connection let's go with
587943
+         * that to fetch further cluster-status and avoid
587943
+         * unnecessary pings to pacemakerd.
587943
+         * If cluster is going down and fencer is down already
587943
+         * this will lead to a silently failing fencer reconnect.
587943
+         * On cluster startup we shouldn't see this situation
587943
+         * as first we do is wait for pacemakerd to report all
587943
+         * daemons running.
587943
+         */
587943
+        return rc;
587943
+    }
587943
+    rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
587943
+    if (pacemakerd_api == NULL) {
587943
+        out->err(out, "Could not connect to pacemakerd: %s",
587943
+                 pcmk_rc_str(rc));
587943
+        /* this is unrecoverable so return with rc we have */
587943
+        return rc;
587943
+    }
587943
+    pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state);
587943
+    rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_poll);
587943
+    if (rc == pcmk_rc_ok) {
587943
+        rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name);
587943
+        if (rc == pcmk_rc_ok) {
587943
+            rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_msec/2);
587943
+            if (rc == pcmk_rc_ok) {
587943
+                pcmk_dispatch_ipc(pacemakerd_api);
587943
+                rc = ENOTCONN;
587943
+                switch (state) {
587943
+                    case pcmk_pacemakerd_state_running:
587943
+                        rc = pcmk_rc_ok;
587943
+                        break;
587943
+                    case pcmk_pacemakerd_state_starting_daemons:
587943
+                        print_as(output_format ,"Pacemaker daemons starting ...\n");
587943
+                        break;
587943
+                    case pcmk_pacemakerd_state_wait_for_ping:
587943
+                        print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
587943
+                        break;
587943
+                    case pcmk_pacemakerd_state_shutting_down:
587943
+                        print_as(output_format ,"Pacemaker daemons shutting down ...\n");
587943
+                        break;
587943
+                    case pcmk_pacemakerd_state_shutdown_complete:
587943
+                        /* assuming pacemakerd doesn't dispatch any pings after entering
587943
+                         * that state unless it is waiting for SBD
587943
+                         */
587943
+                        print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
587943
+                        break;
587943
+                    default:
587943
+                        break;
587943
+                }
587943
+            }
587943
+        }
587943
+    }
587943
+    pcmk_free_ipc_api(pacemakerd_api);
587943
+    /* returning with ENOTCONN triggers a retry */
587943
+    return (rc == pcmk_rc_ok)?rc:ENOTCONN;
587943
+}
587943
+
587943
 #if CURSES_ENABLED
587943
 static const char *
587943
 get_option_desc(char c)
587943
@@ -1033,8 +1158,11 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
587943
     }
587943
 
587943
 refresh:
587943
-    fencing_connect();
587943
-    rc = cib_connect(FALSE);
587943
+    rc = pacemakerd_status();
587943
+    if (rc == pcmk_rc_ok) {
587943
+        fencing_connect();
587943
+        rc = cib_connect(FALSE);
587943
+    }
587943
     if (rc == pcmk_rc_ok) {
587943
         mon_refresh_display(NULL);
587943
     } else {
587943
@@ -1238,9 +1366,13 @@ one_shot(void)
587943
 {
587943
     int rc;
587943
 
587943
-    fencing_connect();
587943
+    rc = pacemakerd_status();
587943
+
587943
+    if (rc == pcmk_rc_ok) {
587943
+        fencing_connect();
587943
+        rc = cib_connect(FALSE);
587943
+    }
587943
 
587943
-    rc = cib_connect(FALSE);
587943
     if (rc == pcmk_rc_ok) {
587943
         mon_refresh_display(NULL);
587943
     } else {
587943
@@ -1316,6 +1448,7 @@ main(int argc, char **argv)
587943
 
587943
                 case cib_native:
587943
                     /* cib & fencing - everything available */
587943
+                    options.mon_ops |= mon_op_cib_native;
587943
                     break;
587943
 
587943
                 case cib_file:
587943
@@ -1483,8 +1616,11 @@ main(int argc, char **argv)
587943
     do {
587943
         print_as(output_format ,"Waiting until cluster is available on this node ...\n");
587943
 
587943
-        fencing_connect();
587943
-        rc = cib_connect(TRUE);
587943
+        rc = pacemakerd_status();
587943
+        if (rc == pcmk_rc_ok) {
587943
+            fencing_connect();
587943
+            rc = cib_connect(TRUE);
587943
+        }
587943
 
587943
         if (rc != pcmk_rc_ok) {
587943
             sleep(options.reconnect_msec / 1000);
587943
diff --git a/tools/crm_mon.h b/tools/crm_mon.h
587943
index 73c926d..b556913 100644
587943
--- a/tools/crm_mon.h
587943
+++ b/tools/crm_mon.h
587943
@@ -91,6 +91,7 @@ typedef enum mon_output_format_e {
587943
 #define mon_op_print_brief          (0x0200U)
587943
 #define mon_op_print_pending        (0x0400U)
587943
 #define mon_op_print_clone_detail   (0x0800U)
587943
+#define mon_op_cib_native           (0x1000U)
587943
 
587943
 #define mon_op_default              (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect)
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 046516dbe66fb2c52b90f36215cf60c5ad3c269b Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Thu, 28 Jan 2021 16:38:22 +0100
587943
Subject: [PATCH 4/7] Refactor: crm_mon: do refreshes rather via
587943
 refresh_after_event
587943
587943
---
587943
 tools/crm_mon.c | 35 ++++++++++++++---------------------
587943
 1 file changed, 14 insertions(+), 21 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 61f070d..195e7b5 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -135,7 +135,7 @@ static int fencing_connect(void);
587943
 static int pacemakerd_status(void);
587943
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
587943
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
587943
-static void refresh_after_event(gboolean data_updated);
587943
+static void refresh_after_event(gboolean data_updated, gboolean enforce);
587943
 
587943
 static unsigned int
587943
 all_includes(mon_output_format_t fmt) {
587943
@@ -694,13 +694,13 @@ reconnect_after_timeout(gpointer data)
587943
         fencing_connect();
587943
         if (cib_connect(TRUE) == pcmk_rc_ok) {
587943
             /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
-            mon_refresh_display(NULL);
587943
+            refresh_after_event(FALSE, TRUE);
587943
             return FALSE;
587943
         }
587943
     }
587943
 
587943
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
-    return TRUE;
587943
+    return FALSE;
587943
 }
587943
 
587943
 /* Called from various places when we are disconnected from the CIB or from the
587943
@@ -1057,7 +1057,6 @@ static gboolean
587943
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
587943
 {
587943
     int c;
587943
-    int rc;
587943
     gboolean config_mode = FALSE;
587943
 
587943
     while (1) {
587943
@@ -1158,16 +1157,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
587943
     }
587943
 
587943
 refresh:
587943
-    rc = pacemakerd_status();
587943
-    if (rc == pcmk_rc_ok) {
587943
-        fencing_connect();
587943
-        rc = cib_connect(FALSE);
587943
-    }
587943
-    if (rc == pcmk_rc_ok) {
587943
-        mon_refresh_display(NULL);
587943
-    } else {
587943
-        handle_connection_failures(rc);
587943
-    }
587943
+    refresh_after_event(FALSE, TRUE);
587943
 
587943
     return TRUE;
587943
 }
587943
@@ -2087,7 +2077,7 @@ crm_diff_update(const char *event, xmlNode * msg)
587943
     }
587943
 
587943
     stale = FALSE;
587943
-    refresh_after_event(cib_updated);
587943
+    refresh_after_event(cib_updated, FALSE);
587943
 }
587943
 
587943
 static int
587943
@@ -2246,7 +2236,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
587943
  * fencing event is received or a CIB diff occurrs.
587943
  */
587943
 static void
587943
-refresh_after_event(gboolean data_updated)
587943
+refresh_after_event(gboolean data_updated, gboolean enforce)
587943
 {
587943
     static int updates = 0;
587943
     time_t now = time(NULL);
587943
@@ -2259,12 +2249,15 @@ refresh_after_event(gboolean data_updated)
587943
         refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
587943
     }
587943
 
587943
-    if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
587943
-        mainloop_set_trigger(refresh_trigger);
587943
+    if (reconnect_timer > 0) {
587943
+        /* we will receive a refresh request after successful reconnect */
587943
         mainloop_timer_stop(refresh_timer);
587943
-        updates = 0;
587943
+        return;
587943
+    }
587943
 
587943
-    } else if(updates >= 10) {
587943
+    if (enforce ||
587943
+        now - last_refresh > options.reconnect_msec / 1000 ||
587943
+        updates >= 10) {
587943
         mainloop_set_trigger(refresh_trigger);
587943
         mainloop_timer_stop(refresh_timer);
587943
         updates = 0;
587943
@@ -2285,7 +2278,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
587943
         mon_cib_connection_destroy(NULL);
587943
     } else {
587943
         print_dot(output_format);
587943
-        refresh_after_event(TRUE);
587943
+        refresh_after_event(TRUE, FALSE);
587943
     }
587943
 }
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From a63af2713f96719fc1d5ef594eb033d0f251187f Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Thu, 28 Jan 2021 16:52:57 +0100
587943
Subject: [PATCH 5/7] Fix: crm_mon: retry fencer connection as not fatal
587943
 initially
587943
587943
and cleanup fencer api to not leak memory on multiple reconnects
587943
---
587943
 tools/crm_mon.c | 8 +++++++-
587943
 1 file changed, 7 insertions(+), 1 deletion(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 195e7b5..a768ca9 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -798,7 +798,7 @@ fencing_connect(void)
587943
             st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
587943
         }
587943
     } else {
587943
-        st = NULL;
587943
+        clean_up_fencing_connection();
587943
     }
587943
 
587943
     return rc;
587943
@@ -2255,6 +2255,12 @@ refresh_after_event(gboolean data_updated, gboolean enforce)
587943
         return;
587943
     }
587943
 
587943
+    /* as we're not handling initial failure of fencer-connection as
587943
+     * fatal give it a retry here
587943
+     * not getting here if cib-reconnection is already on the way
587943
+     */
587943
+    fencing_connect();
587943
+
587943
     if (enforce ||
587943
         now - last_refresh > options.reconnect_msec / 1000 ||
587943
         updates >= 10) {
587943
-- 
587943
1.8.3.1
587943
587943
587943
From b6f4b5dfc0b5fec8cdc029409fc61252de019415 Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Thu, 28 Jan 2021 18:08:43 +0100
587943
Subject: [PATCH 6/7] Refactor: crm_mon: have reconnect-timer removed
587943
 implicitly
587943
587943
---
587943
 tools/crm_mon.c | 12 ++++--------
587943
 1 file changed, 4 insertions(+), 8 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index a768ca9..4f73379 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -684,23 +684,19 @@ reconnect_after_timeout(gpointer data)
587943
     }
587943
 #endif
587943
 
587943
-    if (reconnect_timer > 0) {
587943
-        g_source_remove(reconnect_timer);
587943
-        reconnect_timer = 0;
587943
-    }
587943
-
587943
     print_as(output_format, "Reconnecting...\n");
587943
     if (pacemakerd_status() == pcmk_rc_ok) {
587943
         fencing_connect();
587943
         if (cib_connect(TRUE) == pcmk_rc_ok) {
587943
-            /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
+            /* trigger redrawing the screen (needs reconnect_timer == 0) */
587943
+            reconnect_timer = 0;
587943
             refresh_after_event(FALSE, TRUE);
587943
-            return FALSE;
587943
+            return G_SOURCE_REMOVE;
587943
         }
587943
     }
587943
 
587943
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
-    return FALSE;
587943
+    return G_SOURCE_REMOVE;
587943
 }
587943
 
587943
 /* Called from various places when we are disconnected from the CIB or from the
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 586e69ec38d5273b348c42a61b9bc7bbcc2b93b3 Mon Sep 17 00:00:00 2001
587943
From: Klaus Wenninger <klaus.wenninger@aon.at>
587943
Date: Thu, 28 Jan 2021 21:08:16 +0100
587943
Subject: [PATCH 7/7] Fix: crm_mon: suppress pacemakerd-status for non-text
587943
 output
587943
587943
---
587943
 tools/crm_mon.c | 53 ++++++++++++++++++++++++++++++++---------------------
587943
 1 file changed, 32 insertions(+), 21 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 4f73379..d4d4ac3 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -995,27 +995,38 @@ pacemakerd_status(void)
587943
             if (rc == pcmk_rc_ok) {
587943
                 pcmk_dispatch_ipc(pacemakerd_api);
587943
                 rc = ENOTCONN;
587943
-                switch (state) {
587943
-                    case pcmk_pacemakerd_state_running:
587943
-                        rc = pcmk_rc_ok;
587943
-                        break;
587943
-                    case pcmk_pacemakerd_state_starting_daemons:
587943
-                        print_as(output_format ,"Pacemaker daemons starting ...\n");
587943
-                        break;
587943
-                    case pcmk_pacemakerd_state_wait_for_ping:
587943
-                        print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
587943
-                        break;
587943
-                    case pcmk_pacemakerd_state_shutting_down:
587943
-                        print_as(output_format ,"Pacemaker daemons shutting down ...\n");
587943
-                        break;
587943
-                    case pcmk_pacemakerd_state_shutdown_complete:
587943
-                        /* assuming pacemakerd doesn't dispatch any pings after entering
587943
-                         * that state unless it is waiting for SBD
587943
-                         */
587943
-                        print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
587943
-                        break;
587943
-                    default:
587943
-                        break;
587943
+                if ((output_format == mon_output_console) ||
587943
+                    (output_format == mon_output_plain)) {
587943
+                    switch (state) {
587943
+                        case pcmk_pacemakerd_state_running:
587943
+                            rc = pcmk_rc_ok;
587943
+                            break;
587943
+                        case pcmk_pacemakerd_state_starting_daemons:
587943
+                            print_as(output_format ,"Pacemaker daemons starting ...\n");
587943
+                            break;
587943
+                        case pcmk_pacemakerd_state_wait_for_ping:
587943
+                            print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
587943
+                            break;
587943
+                        case pcmk_pacemakerd_state_shutting_down:
587943
+                            print_as(output_format ,"Pacemaker daemons shutting down ...\n");
587943
+                            break;
587943
+                        case pcmk_pacemakerd_state_shutdown_complete:
587943
+                            /* assuming pacemakerd doesn't dispatch any pings after entering
587943
+                            * that state unless it is waiting for SBD
587943
+                            */
587943
+                            print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
587943
+                            break;
587943
+                        default:
587943
+                            break;
587943
+                    }
587943
+                } else {
587943
+                    switch (state) {
587943
+                        case pcmk_pacemakerd_state_running:
587943
+                            rc = pcmk_rc_ok;
587943
+                            break;
587943
+                        default:
587943
+                            break;
587943
+                    }
587943
                 }
587943
             }
587943
         }
587943
-- 
587943
1.8.3.1
587943