c563b9
From 68139dc8ff5efbfd81d3b5e868462e7eaefa2c74 Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Mon, 25 Jan 2021 15:35:33 +0100
c563b9
Subject: [PATCH 1/7] Fix: crm_mon: add explicit void to one_shot prototype for
c563b9
 compat
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 2 +-
c563b9
 1 file changed, 1 insertion(+), 1 deletion(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 0981634..1eca1b7 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -1226,7 +1226,7 @@ handle_connection_failures(int rc)
c563b9
 }
c563b9
 
c563b9
 static void
c563b9
-one_shot()
c563b9
+one_shot(void)
c563b9
 {
c563b9
     int rc;
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 8c7a01f8880efff8457e8421c381082b250d4512 Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Mon, 25 Jan 2021 16:26:30 +0100
c563b9
Subject: [PATCH 2/7] Refactor: crm_mon: cib_connect &
c563b9
 handle_connection_failures -> new rc
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 62 ++++++++++++++++++++++++++++++++-------------------------
c563b9
 1 file changed, 35 insertions(+), 27 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 1eca1b7..3fbac5f 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -690,7 +690,7 @@ reconnect_after_timeout(gpointer data)
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
     fencing_connect();
c563b9
-    if (cib_connect(TRUE) == pcmk_ok) {
c563b9
+    if (cib_connect(TRUE) == pcmk_rc_ok) {
c563b9
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
         mon_refresh_display(NULL);
c563b9
         return FALSE;
c563b9
@@ -804,16 +804,17 @@ fencing_connect(void)
c563b9
 static int
c563b9
 cib_connect(gboolean full)
c563b9
 {
c563b9
-    int rc = pcmk_ok;
c563b9
+    int rc = pcmk_rc_ok;
c563b9
     static gboolean need_pass = TRUE;
c563b9
 
c563b9
-    CRM_CHECK(cib != NULL, return -EINVAL);
c563b9
+    CRM_CHECK(cib != NULL, return EINVAL);
c563b9
 
c563b9
     if (getenv("CIB_passwd") != NULL) {
c563b9
         need_pass = FALSE;
c563b9
     }
c563b9
 
c563b9
-    if (cib->state == cib_connected_query || cib->state == cib_connected_command) {
c563b9
+    if (cib->state == cib_connected_query ||
c563b9
+        cib->state == cib_connected_command) {
c563b9
         return rc;
c563b9
     }
c563b9
 
c563b9
@@ -825,37 +826,44 @@ cib_connect(gboolean full)
c563b9
      * @TODO Add a password prompt (maybe including input) function to
c563b9
      *       pcmk__output_t and use it in libcib.
c563b9
      */
c563b9
-    if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
c563b9
+    if ((output_format == mon_output_console) &&
c563b9
+         need_pass &&
c563b9
+         (cib->variant == cib_remote)) {
c563b9
         need_pass = FALSE;
c563b9
         print_as(output_format, "Password:");
c563b9
     }
c563b9
 
c563b9
-    rc = cib->cmds->signon(cib, crm_system_name, cib_query);
c563b9
-    if (rc != pcmk_ok) {
c563b9
+    rc = pcmk_legacy2rc(cib->cmds->signon(cib, crm_system_name, cib_query));
c563b9
+    if (rc != pcmk_rc_ok) {
c563b9
         out->err(out, "Could not connect to the CIB: %s",
c563b9
-                 pcmk_strerror(rc));
c563b9
+                 pcmk_rc_str(rc));
c563b9
         return rc;
c563b9
     }
c563b9
 
c563b9
-    rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
c563b9
+    rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, &current_cib,
c563b9
+                                         cib_scope_local | cib_sync_call));
c563b9
 
c563b9
-    if (rc == pcmk_ok && full) {
c563b9
-        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
c563b9
-        if (rc == -EPROTONOSUPPORT) {
c563b9
-            print_as
c563b9
-                (output_format, "Notification setup not supported, won't be able to reconnect after failure");
c563b9
+    if (rc == pcmk_rc_ok && full) {
c563b9
+        rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
c563b9
+            mon_cib_connection_destroy));
c563b9
+        if (rc == EPROTONOSUPPORT) {
c563b9
+            print_as(output_format,
c563b9
+                     "Notification setup not supported, won't be "
c563b9
+                     "able to reconnect after failure");
c563b9
             if (output_format == mon_output_console) {
c563b9
                 sleep(2);
c563b9
             }
c563b9
-            rc = pcmk_ok;
c563b9
+            rc = pcmk_rc_ok;
c563b9
         }
c563b9
 
c563b9
-        if (rc == pcmk_ok) {
c563b9
-            cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
c563b9
-            rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
c563b9
+        if (rc == pcmk_rc_ok) {
c563b9
+            cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY,
c563b9
+                                           crm_diff_update);
c563b9
+            rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib,
c563b9
+                                    T_CIB_DIFF_NOTIFY, crm_diff_update));
c563b9
         }
c563b9
 
c563b9
-        if (rc != pcmk_ok) {
c563b9
+        if (rc != pcmk_rc_ok) {
c563b9
             out->err(out, "Notification setup failed, could not monitor CIB actions");
c563b9
             clean_up_cib_connection();
c563b9
             clean_up_fencing_connection();
c563b9
@@ -1206,20 +1214,20 @@ reconcile_output_format(pcmk__common_args_t *args) {
c563b9
 static void
c563b9
 handle_connection_failures(int rc)
c563b9
 {
c563b9
-    if (rc == pcmk_ok) {
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
         return;
c563b9
     }
c563b9
 
c563b9
     if (output_format == mon_output_monitor) {
c563b9
         g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
c563b9
-                    pcmk_strerror(rc));
c563b9
+                    pcmk_rc_str(rc));
c563b9
         rc = MON_STATUS_CRIT;
c563b9
-    } else if (rc == -ENOTCONN) {
c563b9
+    } else if (rc == ENOTCONN) {
c563b9
         g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
c563b9
-        rc = crm_errno2exit(rc);
c563b9
+        rc = pcmk_rc2exitc(rc);
c563b9
     } else {
c563b9
-        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
c563b9
-        rc = crm_errno2exit(rc);
c563b9
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
c563b9
+        rc = pcmk_rc2exitc(rc);
c563b9
     }
c563b9
 
c563b9
     clean_up(rc);
c563b9
@@ -1478,7 +1486,7 @@ main(int argc, char **argv)
c563b9
         fencing_connect();
c563b9
         rc = cib_connect(TRUE);
c563b9
 
c563b9
-        if (rc != pcmk_ok) {
c563b9
+        if (rc != pcmk_rc_ok) {
c563b9
             sleep(options.reconnect_msec / 1000);
c563b9
 #if CURSES_ENABLED
c563b9
             if (output_format == mon_output_console) {
c563b9
@@ -1490,7 +1498,7 @@ main(int argc, char **argv)
c563b9
             printf("Writing html to %s ...\n", args->output_dest);
c563b9
         }
c563b9
 
c563b9
-    } while (rc == -ENOTCONN);
c563b9
+    } while (rc == ENOTCONN);
c563b9
 
c563b9
     handle_connection_failures(rc);
c563b9
     set_fencing_options(interactive_fence_level);
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 9b8fb7b608280f65a3b76d66a99b575a4da70944 Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Mon, 25 Jan 2021 18:26:04 +0100
c563b9
Subject: [PATCH 3/7] Fix: tools: Report pacemakerd in state waiting for sbd
c563b9
c563b9
Waiting for pacemakerd to report that all subdaemons are started
c563b9
before trying to connect to cib and fencer should remove the
c563b9
potential race introduced by making fencer connection failure
c563b9
non fatal when cib is faster to come up.
c563b9
---
c563b9
 tools/crm_mon.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
c563b9
 tools/crm_mon.h |   1 +
c563b9
 2 files changed, 148 insertions(+), 11 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 3fbac5f..61f070d 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -132,6 +132,7 @@ static void handle_connection_failures(int rc);
c563b9
 static int mon_refresh_display(gpointer user_data);
c563b9
 static int cib_connect(gboolean full);
c563b9
 static int fencing_connect(void);
c563b9
+static int pacemakerd_status(void);
c563b9
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
c563b9
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
c563b9
 static void refresh_after_event(gboolean data_updated);
c563b9
@@ -689,11 +690,13 @@ reconnect_after_timeout(gpointer data)
c563b9
     }
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
-    fencing_connect();
c563b9
-    if (cib_connect(TRUE) == pcmk_rc_ok) {
c563b9
-        /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
-        mon_refresh_display(NULL);
c563b9
-        return FALSE;
c563b9
+    if (pacemakerd_status() == pcmk_rc_ok) {
c563b9
+        fencing_connect();
c563b9
+        if (cib_connect(TRUE) == pcmk_rc_ok) {
c563b9
+            /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
+            mon_refresh_display(NULL);
c563b9
+            return FALSE;
c563b9
+        }
c563b9
     }
c563b9
 
c563b9
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
@@ -840,6 +843,13 @@ cib_connect(gboolean full)
c563b9
         return rc;
c563b9
     }
c563b9
 
c563b9
+#if CURSES_ENABLED
c563b9
+    /* just show this if refresh is gonna remove all traces */
c563b9
+    if (output_format == mon_output_console) {
c563b9
+        print_as(output_format ,"Waiting for CIB ...\n");
c563b9
+    }
c563b9
+#endif
c563b9
+
c563b9
     rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, &current_cib,
c563b9
                                          cib_scope_local | cib_sync_call));
c563b9
 
c563b9
@@ -904,6 +914,121 @@ set_fencing_options(int level)
c563b9
     }
c563b9
 }
c563b9
 
c563b9
+/* Before trying to connect to fencer or cib check for state of
c563b9
+   pacemakerd - just no sense in trying till pacemakerd has
c563b9
+   taken care of starting all the sub-processes
c563b9
+
c563b9
+   Only noteworthy thing to show here is when pacemakerd is
c563b9
+   waiting for startup-trigger from SBD.
c563b9
+ */
c563b9
+static void
c563b9
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
c563b9
+                    enum pcmk_ipc_event event_type, crm_exit_t status,
c563b9
+                    void *event_data, void *user_data)
c563b9
+{
c563b9
+    pcmk_pacemakerd_api_reply_t *reply = event_data;
c563b9
+    enum pcmk_pacemakerd_state *state =
c563b9
+        (enum pcmk_pacemakerd_state *) user_data;
c563b9
+
c563b9
+    /* we are just interested in the latest reply */
c563b9
+    *state = pcmk_pacemakerd_state_invalid;
c563b9
+
c563b9
+    switch (event_type) {
c563b9
+        case pcmk_ipc_event_reply:
c563b9
+            break;
c563b9
+
c563b9
+        default:
c563b9
+            return;
c563b9
+    }
c563b9
+
c563b9
+    if (status != CRM_EX_OK) {
c563b9
+        out->err(out, "Bad reply from pacemakerd: %s",
c563b9
+                 crm_exit_str(status));
c563b9
+        return;
c563b9
+    }
c563b9
+
c563b9
+    if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
c563b9
+        out->err(out, "Unknown reply type %d from pacemakerd",
c563b9
+                 reply->reply_type);
c563b9
+    } else {
c563b9
+        if ((reply->data.ping.last_good != (time_t) 0) &&
c563b9
+            (reply->data.ping.status == pcmk_rc_ok)) {
c563b9
+            *state = reply->data.ping.state;
c563b9
+        }
c563b9
+    }
c563b9
+}
c563b9
+
c563b9
+static int
c563b9
+pacemakerd_status(void)
c563b9
+{
c563b9
+    int rc = pcmk_rc_ok;
c563b9
+    pcmk_ipc_api_t *pacemakerd_api = NULL;
c563b9
+    enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid;
c563b9
+
c563b9
+    if (!pcmk_is_set(options.mon_ops, mon_op_cib_native)) {
c563b9
+        /* we don't need fully functional pacemakerd otherwise */
c563b9
+        return rc;
c563b9
+    }
c563b9
+    if (cib != NULL &&
c563b9
+        (cib->state == cib_connected_query ||
c563b9
+         cib->state == cib_connected_command)) {
c563b9
+        /* As long as we have a cib-connection let's go with
c563b9
+         * that to fetch further cluster-status and avoid
c563b9
+         * unnecessary pings to pacemakerd.
c563b9
+         * If cluster is going down and fencer is down already
c563b9
+         * this will lead to a silently failing fencer reconnect.
c563b9
+         * On cluster startup we shouldn't see this situation
c563b9
+         * as first we do is wait for pacemakerd to report all
c563b9
+         * daemons running.
c563b9
+         */
c563b9
+        return rc;
c563b9
+    }
c563b9
+    rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
c563b9
+    if (pacemakerd_api == NULL) {
c563b9
+        out->err(out, "Could not connect to pacemakerd: %s",
c563b9
+                 pcmk_rc_str(rc));
c563b9
+        /* this is unrecoverable so return with rc we have */
c563b9
+        return rc;
c563b9
+    }
c563b9
+    pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state);
c563b9
+    rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_poll);
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
+        rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name);
c563b9
+        if (rc == pcmk_rc_ok) {
c563b9
+            rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_msec/2);
c563b9
+            if (rc == pcmk_rc_ok) {
c563b9
+                pcmk_dispatch_ipc(pacemakerd_api);
c563b9
+                rc = ENOTCONN;
c563b9
+                switch (state) {
c563b9
+                    case pcmk_pacemakerd_state_running:
c563b9
+                        rc = pcmk_rc_ok;
c563b9
+                        break;
c563b9
+                    case pcmk_pacemakerd_state_starting_daemons:
c563b9
+                        print_as(output_format ,"Pacemaker daemons starting ...\n");
c563b9
+                        break;
c563b9
+                    case pcmk_pacemakerd_state_wait_for_ping:
c563b9
+                        print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
c563b9
+                        break;
c563b9
+                    case pcmk_pacemakerd_state_shutting_down:
c563b9
+                        print_as(output_format ,"Pacemaker daemons shutting down ...\n");
c563b9
+                        break;
c563b9
+                    case pcmk_pacemakerd_state_shutdown_complete:
c563b9
+                        /* assuming pacemakerd doesn't dispatch any pings after entering
c563b9
+                         * that state unless it is waiting for SBD
c563b9
+                         */
c563b9
+                        print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
c563b9
+                        break;
c563b9
+                    default:
c563b9
+                        break;
c563b9
+                }
c563b9
+            }
c563b9
+        }
c563b9
+    }
c563b9
+    pcmk_free_ipc_api(pacemakerd_api);
c563b9
+    /* returning with ENOTCONN triggers a retry */
c563b9
+    return (rc == pcmk_rc_ok)?rc:ENOTCONN;
c563b9
+}
c563b9
+
c563b9
 #if CURSES_ENABLED
c563b9
 static const char *
c563b9
 get_option_desc(char c)
c563b9
@@ -1033,8 +1158,11 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
c563b9
     }
c563b9
 
c563b9
 refresh:
c563b9
-    fencing_connect();
c563b9
-    rc = cib_connect(FALSE);
c563b9
+    rc = pacemakerd_status();
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
+        fencing_connect();
c563b9
+        rc = cib_connect(FALSE);
c563b9
+    }
c563b9
     if (rc == pcmk_rc_ok) {
c563b9
         mon_refresh_display(NULL);
c563b9
     } else {
c563b9
@@ -1238,9 +1366,13 @@ one_shot(void)
c563b9
 {
c563b9
     int rc;
c563b9
 
c563b9
-    fencing_connect();
c563b9
+    rc = pacemakerd_status();
c563b9
+
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
+        fencing_connect();
c563b9
+        rc = cib_connect(FALSE);
c563b9
+    }
c563b9
 
c563b9
-    rc = cib_connect(FALSE);
c563b9
     if (rc == pcmk_rc_ok) {
c563b9
         mon_refresh_display(NULL);
c563b9
     } else {
c563b9
@@ -1316,6 +1448,7 @@ main(int argc, char **argv)
c563b9
 
c563b9
                 case cib_native:
c563b9
                     /* cib & fencing - everything available */
c563b9
+                    options.mon_ops |= mon_op_cib_native;
c563b9
                     break;
c563b9
 
c563b9
                 case cib_file:
c563b9
@@ -1483,8 +1616,11 @@ main(int argc, char **argv)
c563b9
     do {
c563b9
         print_as(output_format ,"Waiting until cluster is available on this node ...\n");
c563b9
 
c563b9
-        fencing_connect();
c563b9
-        rc = cib_connect(TRUE);
c563b9
+        rc = pacemakerd_status();
c563b9
+        if (rc == pcmk_rc_ok) {
c563b9
+            fencing_connect();
c563b9
+            rc = cib_connect(TRUE);
c563b9
+        }
c563b9
 
c563b9
         if (rc != pcmk_rc_ok) {
c563b9
             sleep(options.reconnect_msec / 1000);
c563b9
diff --git a/tools/crm_mon.h b/tools/crm_mon.h
c563b9
index 73c926d..b556913 100644
c563b9
--- a/tools/crm_mon.h
c563b9
+++ b/tools/crm_mon.h
c563b9
@@ -91,6 +91,7 @@ typedef enum mon_output_format_e {
c563b9
 #define mon_op_print_brief          (0x0200U)
c563b9
 #define mon_op_print_pending        (0x0400U)
c563b9
 #define mon_op_print_clone_detail   (0x0800U)
c563b9
+#define mon_op_cib_native           (0x1000U)
c563b9
 
c563b9
 #define mon_op_default              (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect)
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 046516dbe66fb2c52b90f36215cf60c5ad3c269b Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Thu, 28 Jan 2021 16:38:22 +0100
c563b9
Subject: [PATCH 4/7] Refactor: crm_mon: do refreshes rather via
c563b9
 refresh_after_event
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 35 ++++++++++++++---------------------
c563b9
 1 file changed, 14 insertions(+), 21 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 61f070d..195e7b5 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -135,7 +135,7 @@ static int fencing_connect(void);
c563b9
 static int pacemakerd_status(void);
c563b9
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
c563b9
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
c563b9
-static void refresh_after_event(gboolean data_updated);
c563b9
+static void refresh_after_event(gboolean data_updated, gboolean enforce);
c563b9
 
c563b9
 static unsigned int
c563b9
 all_includes(mon_output_format_t fmt) {
c563b9
@@ -694,13 +694,13 @@ reconnect_after_timeout(gpointer data)
c563b9
         fencing_connect();
c563b9
         if (cib_connect(TRUE) == pcmk_rc_ok) {
c563b9
             /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
-            mon_refresh_display(NULL);
c563b9
+            refresh_after_event(FALSE, TRUE);
c563b9
             return FALSE;
c563b9
         }
c563b9
     }
c563b9
 
c563b9
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
-    return TRUE;
c563b9
+    return FALSE;
c563b9
 }
c563b9
 
c563b9
 /* Called from various places when we are disconnected from the CIB or from the
c563b9
@@ -1057,7 +1057,6 @@ static gboolean
c563b9
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
c563b9
 {
c563b9
     int c;
c563b9
-    int rc;
c563b9
     gboolean config_mode = FALSE;
c563b9
 
c563b9
     while (1) {
c563b9
@@ -1158,16 +1157,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
c563b9
     }
c563b9
 
c563b9
 refresh:
c563b9
-    rc = pacemakerd_status();
c563b9
-    if (rc == pcmk_rc_ok) {
c563b9
-        fencing_connect();
c563b9
-        rc = cib_connect(FALSE);
c563b9
-    }
c563b9
-    if (rc == pcmk_rc_ok) {
c563b9
-        mon_refresh_display(NULL);
c563b9
-    } else {
c563b9
-        handle_connection_failures(rc);
c563b9
-    }
c563b9
+    refresh_after_event(FALSE, TRUE);
c563b9
 
c563b9
     return TRUE;
c563b9
 }
c563b9
@@ -2087,7 +2077,7 @@ crm_diff_update(const char *event, xmlNode * msg)
c563b9
     }
c563b9
 
c563b9
     stale = FALSE;
c563b9
-    refresh_after_event(cib_updated);
c563b9
+    refresh_after_event(cib_updated, FALSE);
c563b9
 }
c563b9
 
c563b9
 static int
c563b9
@@ -2246,7 +2236,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
c563b9
  * fencing event is received or a CIB diff occurrs.
c563b9
  */
c563b9
 static void
c563b9
-refresh_after_event(gboolean data_updated)
c563b9
+refresh_after_event(gboolean data_updated, gboolean enforce)
c563b9
 {
c563b9
     static int updates = 0;
c563b9
     time_t now = time(NULL);
c563b9
@@ -2259,12 +2249,15 @@ refresh_after_event(gboolean data_updated)
c563b9
         refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
c563b9
     }
c563b9
 
c563b9
-    if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
c563b9
-        mainloop_set_trigger(refresh_trigger);
c563b9
+    if (reconnect_timer > 0) {
c563b9
+        /* we will receive a refresh request after successful reconnect */
c563b9
         mainloop_timer_stop(refresh_timer);
c563b9
-        updates = 0;
c563b9
+        return;
c563b9
+    }
c563b9
 
c563b9
-    } else if(updates >= 10) {
c563b9
+    if (enforce ||
c563b9
+        now - last_refresh > options.reconnect_msec / 1000 ||
c563b9
+        updates >= 10) {
c563b9
         mainloop_set_trigger(refresh_trigger);
c563b9
         mainloop_timer_stop(refresh_timer);
c563b9
         updates = 0;
c563b9
@@ -2285,7 +2278,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
c563b9
         mon_cib_connection_destroy(NULL);
c563b9
     } else {
c563b9
         print_dot(output_format);
c563b9
-        refresh_after_event(TRUE);
c563b9
+        refresh_after_event(TRUE, FALSE);
c563b9
     }
c563b9
 }
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From a63af2713f96719fc1d5ef594eb033d0f251187f Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Thu, 28 Jan 2021 16:52:57 +0100
c563b9
Subject: [PATCH 5/7] Fix: crm_mon: retry fencer connection as not fatal
c563b9
 initially
c563b9
c563b9
and cleanup fencer api to not leak memory on multiple reconnects
c563b9
---
c563b9
 tools/crm_mon.c | 8 +++++++-
c563b9
 1 file changed, 7 insertions(+), 1 deletion(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 195e7b5..a768ca9 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -798,7 +798,7 @@ fencing_connect(void)
c563b9
             st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
c563b9
         }
c563b9
     } else {
c563b9
-        st = NULL;
c563b9
+        clean_up_fencing_connection();
c563b9
     }
c563b9
 
c563b9
     return rc;
c563b9
@@ -2255,6 +2255,12 @@ refresh_after_event(gboolean data_updated, gboolean enforce)
c563b9
         return;
c563b9
     }
c563b9
 
c563b9
+    /* as we're not handling initial failure of fencer-connection as
c563b9
+     * fatal give it a retry here
c563b9
+     * not getting here if cib-reconnection is already on the way
c563b9
+     */
c563b9
+    fencing_connect();
c563b9
+
c563b9
     if (enforce ||
c563b9
         now - last_refresh > options.reconnect_msec / 1000 ||
c563b9
         updates >= 10) {
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From b6f4b5dfc0b5fec8cdc029409fc61252de019415 Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Thu, 28 Jan 2021 18:08:43 +0100
c563b9
Subject: [PATCH 6/7] Refactor: crm_mon: have reconnect-timer removed
c563b9
 implicitly
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 12 ++++--------
c563b9
 1 file changed, 4 insertions(+), 8 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index a768ca9..4f73379 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -684,23 +684,19 @@ reconnect_after_timeout(gpointer data)
c563b9
     }
c563b9
 #endif
c563b9
 
c563b9
-    if (reconnect_timer > 0) {
c563b9
-        g_source_remove(reconnect_timer);
c563b9
-        reconnect_timer = 0;
c563b9
-    }
c563b9
-
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
     if (pacemakerd_status() == pcmk_rc_ok) {
c563b9
         fencing_connect();
c563b9
         if (cib_connect(TRUE) == pcmk_rc_ok) {
c563b9
-            /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
+            /* trigger redrawing the screen (needs reconnect_timer == 0) */
c563b9
+            reconnect_timer = 0;
c563b9
             refresh_after_event(FALSE, TRUE);
c563b9
-            return FALSE;
c563b9
+            return G_SOURCE_REMOVE;
c563b9
         }
c563b9
     }
c563b9
 
c563b9
     reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
-    return FALSE;
c563b9
+    return G_SOURCE_REMOVE;
c563b9
 }
c563b9
 
c563b9
 /* Called from various places when we are disconnected from the CIB or from the
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 586e69ec38d5273b348c42a61b9bc7bbcc2b93b3 Mon Sep 17 00:00:00 2001
c563b9
From: Klaus Wenninger <klaus.wenninger@aon.at>
c563b9
Date: Thu, 28 Jan 2021 21:08:16 +0100
c563b9
Subject: [PATCH 7/7] Fix: crm_mon: suppress pacemakerd-status for non-text
c563b9
 output
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 53 ++++++++++++++++++++++++++++++++---------------------
c563b9
 1 file changed, 32 insertions(+), 21 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 4f73379..d4d4ac3 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -995,27 +995,38 @@ pacemakerd_status(void)
c563b9
             if (rc == pcmk_rc_ok) {
c563b9
                 pcmk_dispatch_ipc(pacemakerd_api);
c563b9
                 rc = ENOTCONN;
c563b9
-                switch (state) {
c563b9
-                    case pcmk_pacemakerd_state_running:
c563b9
-                        rc = pcmk_rc_ok;
c563b9
-                        break;
c563b9
-                    case pcmk_pacemakerd_state_starting_daemons:
c563b9
-                        print_as(output_format ,"Pacemaker daemons starting ...\n");
c563b9
-                        break;
c563b9
-                    case pcmk_pacemakerd_state_wait_for_ping:
c563b9
-                        print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
c563b9
-                        break;
c563b9
-                    case pcmk_pacemakerd_state_shutting_down:
c563b9
-                        print_as(output_format ,"Pacemaker daemons shutting down ...\n");
c563b9
-                        break;
c563b9
-                    case pcmk_pacemakerd_state_shutdown_complete:
c563b9
-                        /* assuming pacemakerd doesn't dispatch any pings after entering
c563b9
-                         * that state unless it is waiting for SBD
c563b9
-                         */
c563b9
-                        print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
c563b9
-                        break;
c563b9
-                    default:
c563b9
-                        break;
c563b9
+                if ((output_format == mon_output_console) ||
c563b9
+                    (output_format == mon_output_plain)) {
c563b9
+                    switch (state) {
c563b9
+                        case pcmk_pacemakerd_state_running:
c563b9
+                            rc = pcmk_rc_ok;
c563b9
+                            break;
c563b9
+                        case pcmk_pacemakerd_state_starting_daemons:
c563b9
+                            print_as(output_format ,"Pacemaker daemons starting ...\n");
c563b9
+                            break;
c563b9
+                        case pcmk_pacemakerd_state_wait_for_ping:
c563b9
+                            print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
c563b9
+                            break;
c563b9
+                        case pcmk_pacemakerd_state_shutting_down:
c563b9
+                            print_as(output_format ,"Pacemaker daemons shutting down ...\n");
c563b9
+                            break;
c563b9
+                        case pcmk_pacemakerd_state_shutdown_complete:
c563b9
+                            /* assuming pacemakerd doesn't dispatch any pings after entering
c563b9
+                            * that state unless it is waiting for SBD
c563b9
+                            */
c563b9
+                            print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
c563b9
+                            break;
c563b9
+                        default:
c563b9
+                            break;
c563b9
+                    }
c563b9
+                } else {
c563b9
+                    switch (state) {
c563b9
+                        case pcmk_pacemakerd_state_running:
c563b9
+                            rc = pcmk_rc_ok;
c563b9
+                            break;
c563b9
+                        default:
c563b9
+                            break;
c563b9
+                    }
c563b9
                 }
c563b9
             }
c563b9
         }
c563b9
-- 
c563b9
1.8.3.1
c563b9