587943
From bc91cc5d8b4257627d09103cf676cd83656bda8c Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Tue, 12 Jan 2021 10:45:53 -0500
587943
Subject: [PATCH 01/11] Refactor: tools: Split up connection teardown in
587943
 crm_mon.
587943
587943
We don't necessarily want to tear down the fencing and CIB connections
587943
at the same time always.  This can then immediately be used in
587943
mon_refresh_display and do_mon_cib_connection_destroy.
587943
---
587943
 tools/crm_mon.c | 57 +++++++++++++++++++++++++++++++--------------------------
587943
 1 file changed, 31 insertions(+), 26 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 8ec97bb..fc20e4c 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -122,7 +122,8 @@ struct {
587943
     .mon_ops = mon_op_default
587943
 };
587943
 
587943
-static void clean_up_connections(void);
587943
+static void clean_up_cib_connection(void);
587943
+static void clean_up_fencing_connection(void);
587943
 static crm_exit_t clean_up(crm_exit_t exit_code);
587943
 static void crm_diff_update(const char *event, xmlNode * msg);
587943
 static int mon_refresh_display(gpointer user_data);
587943
@@ -712,12 +713,7 @@ do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
587943
         /* the client API won't properly reconnect notifications
587943
          * if they are still in the table - so remove them
587943
          */
587943
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
587943
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
587943
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
587943
-        if (st->state != stonith_disconnected) {
587943
-            st->cmds->disconnect(st);
587943
-        }
587943
+        clean_up_fencing_connection();
587943
     }
587943
     if (cib) {
587943
         cib->cmds->signoff(cib);
587943
@@ -851,7 +847,8 @@ cib_connect(gboolean full)
587943
 
587943
         if (rc != pcmk_ok) {
587943
             out->err(out, "Notification setup failed, could not monitor CIB actions");
587943
-            clean_up_connections();
587943
+            clean_up_cib_connection();
587943
+            clean_up_fencing_connection();
587943
         }
587943
     }
587943
     return rc;
587943
@@ -1866,9 +1863,7 @@ mon_refresh_display(gpointer user_data)
587943
     last_refresh = time(NULL);
587943
 
587943
     if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
587943
-        if (cib) {
587943
-            cib->cmds->signoff(cib);
587943
-        }
587943
+        clean_up_cib_connection();
587943
         out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
587943
         clean_up(CRM_EX_CONFIG);
587943
         return 0;
587943
@@ -2040,24 +2035,33 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
587943
 }
587943
 
587943
 static void
587943
-clean_up_connections(void)
587943
+clean_up_cib_connection(void)
587943
 {
587943
-    if (cib != NULL) {
587943
-        cib->cmds->signoff(cib);
587943
-        cib_delete(cib);
587943
-        cib = NULL;
587943
+    if (cib == NULL) {
587943
+        return;
587943
     }
587943
 
587943
-    if (st != NULL) {
587943
-        if (st->state != stonith_disconnected) {
587943
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
587943
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
587943
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
587943
-            st->cmds->disconnect(st);
587943
-        }
587943
-        stonith_api_delete(st);
587943
-        st = NULL;
587943
+    cib->cmds->signoff(cib);
587943
+    cib_delete(cib);
587943
+    cib = NULL;
587943
+}
587943
+
587943
+static void
587943
+clean_up_fencing_connection(void)
587943
+{
587943
+    if (st == NULL) {
587943
+        return;
587943
     }
587943
+
587943
+    if (st->state != stonith_disconnected) {
587943
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
587943
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
587943
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
587943
+        st->cmds->disconnect(st);
587943
+    }
587943
+
587943
+    stonith_api_delete(st);
587943
+    st = NULL;
587943
 }
587943
 
587943
 /*
587943
@@ -2074,7 +2078,8 @@ clean_up(crm_exit_t exit_code)
587943
     /* Quitting crm_mon is much more complicated than it ought to be. */
587943
 
587943
     /* (1) Close connections, free things, etc. */
587943
-    clean_up_connections();
587943
+    clean_up_cib_connection();
587943
+    clean_up_fencing_connection();
587943
     free(options.pid_file);
587943
     free(options.neg_location_prefix);
587943
     g_slist_free_full(options.includes_excludes, free);
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 28d646ce67c6a933eaa76aca51f9973a65d0ee3c Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Thu, 7 Jan 2021 17:18:13 -0500
587943
Subject: [PATCH 02/11] Refactor: tools: Split up connection establishment in
587943
 crm_mon.
587943
587943
We don't necessarily always want to connect to the CIB and fencing in
587943
the same action.  Note that bringing up the fencing connection needs to
587943
happen first, because mon_refresh_display is called from cib_connect and
587943
it will want a fencing connection.
587943
---
587943
 tools/crm_mon.c | 66 +++++++++++++++++++++++++++++++++------------------------
587943
 1 file changed, 38 insertions(+), 28 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index fc20e4c..301a222 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -128,6 +128,7 @@ static crm_exit_t clean_up(crm_exit_t exit_code);
587943
 static void crm_diff_update(const char *event, xmlNode * msg);
587943
 static int mon_refresh_display(gpointer user_data);
587943
 static int cib_connect(gboolean full);
587943
+static int fencing_connect(void);
587943
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
587943
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
587943
 static void kick_refresh(gboolean data_updated);
587943
@@ -668,8 +669,6 @@ static GOptionEntry deprecated_entries[] = {
587943
 static gboolean
587943
 mon_timer_popped(gpointer data)
587943
 {
587943
-    int rc = pcmk_ok;
587943
-
587943
 #if CURSES_ENABLED
587943
     if (output_format == mon_output_console) {
587943
         clear();
587943
@@ -683,9 +682,7 @@ mon_timer_popped(gpointer data)
587943
     }
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
-    rc = cib_connect(TRUE);
587943
-
587943
-    if (rc != pcmk_ok) {
587943
+    if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
587943
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
     }
587943
     return FALSE;
587943
@@ -767,39 +764,48 @@ mon_winresize(int nsig)
587943
 #endif
587943
 
587943
 static int
587943
-cib_connect(gboolean full)
587943
+fencing_connect(void)
587943
 {
587943
     int rc = pcmk_ok;
587943
-    static gboolean need_pass = TRUE;
587943
-
587943
-    CRM_CHECK(cib != NULL, return -EINVAL);
587943
-
587943
-    if (getenv("CIB_passwd") != NULL) {
587943
-        need_pass = FALSE;
587943
-    }
587943
 
587943
     if (pcmk_is_set(options.mon_ops, mon_op_fence_connect) && (st == NULL)) {
587943
         st = stonith_api_new();
587943
     }
587943
 
587943
-    if (pcmk_is_set(options.mon_ops, mon_op_fence_connect)
587943
-        && (st != NULL) && (st->state == stonith_disconnected)) {
587943
+    if (!pcmk_is_set(options.mon_ops, mon_op_fence_connect) ||
587943
+        st == NULL || st->state != stonith_disconnected) {
587943
+        return rc;
587943
+    }
587943
 
587943
-        rc = st->cmds->connect(st, crm_system_name, NULL);
587943
-        if (rc == pcmk_ok) {
587943
-            crm_trace("Setting up stonith callbacks");
587943
-            if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) {
587943
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
587943
-                                                mon_st_callback_event);
587943
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
587943
-            } else {
587943
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
587943
-                                                mon_st_callback_display);
587943
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
587943
-            }
587943
+    rc = st->cmds->connect(st, crm_system_name, NULL);
587943
+    if (rc == pcmk_ok) {
587943
+        crm_trace("Setting up stonith callbacks");
587943
+        if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) {
587943
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
587943
+                                            mon_st_callback_event);
587943
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
587943
+        } else {
587943
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
587943
+                                            mon_st_callback_display);
587943
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
587943
         }
587943
     }
587943
 
587943
+    return rc;
587943
+}
587943
+
587943
+static int
587943
+cib_connect(gboolean full)
587943
+{
587943
+    int rc = pcmk_ok;
587943
+    static gboolean need_pass = TRUE;
587943
+
587943
+    CRM_CHECK(cib != NULL, return -EINVAL);
587943
+
587943
+    if (getenv("CIB_passwd") != NULL) {
587943
+        need_pass = FALSE;
587943
+    }
587943
+
587943
     if (cib->state == cib_connected_query || cib->state == cib_connected_command) {
587943
         return rc;
587943
     }
587943
@@ -1373,7 +1379,11 @@ main(int argc, char **argv)
587943
         if (!pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
             print_as(output_format ,"Waiting until cluster is available on this node ...\n");
587943
         }
587943
-        rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
587943
+
587943
+        rc = fencing_connect();
587943
+        if (rc == pcmk_ok) {
587943
+            rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
587943
+        }
587943
 
587943
         if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
             break;
587943
-- 
587943
1.8.3.1
587943
587943
587943
From e12508ffba06b1c5652e7f49a449aae6d89ec420 Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Tue, 12 Jan 2021 17:01:53 -0500
587943
Subject: [PATCH 03/11] Refactor: tools: Split one shot mode out into its own
587943
 function.
587943
587943
Also, the connection error handling function can get split out on its
587943
own as well to allow it to be reused in both the one shot and loop
587943
cases.
587943
---
587943
 tools/crm_mon.c | 69 +++++++++++++++++++++++++++++++++++----------------------
587943
 1 file changed, 43 insertions(+), 26 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 301a222..b33598b 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -1162,6 +1162,41 @@ reconcile_output_format(pcmk__common_args_t *args) {
587943
     }
587943
 }
587943
 
587943
+static void
587943
+handle_connection_failures(int rc)
587943
+{
587943
+    if (rc == pcmk_ok) {
587943
+        return;
587943
+    }
587943
+
587943
+    if (output_format == mon_output_monitor) {
587943
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
587943
+                    pcmk_strerror(rc));
587943
+        rc = MON_STATUS_CRIT;
587943
+    } else if (rc == -ENOTCONN) {
587943
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
587943
+        rc = crm_errno2exit(rc);
587943
+    } else {
587943
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
587943
+        rc = crm_errno2exit(rc);
587943
+    }
587943
+
587943
+    clean_up(rc);
587943
+}
587943
+
587943
+static void
587943
+one_shot()
587943
+{
587943
+    int rc = fencing_connect();
587943
+
587943
+    if (rc == pcmk_rc_ok) {
587943
+        rc = cib_connect(FALSE);
587943
+        handle_connection_failures(rc);
587943
+    }
587943
+
587943
+    clean_up(CRM_EX_OK);
587943
+}
587943
+
587943
 int
587943
 main(int argc, char **argv)
587943
 {
587943
@@ -1375,20 +1410,19 @@ main(int argc, char **argv)
587943
 
587943
     crm_info("Starting %s", crm_system_name);
587943
 
587943
+    if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
+        one_shot();
587943
+    }
587943
+
587943
     do {
587943
-        if (!pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
-            print_as(output_format ,"Waiting until cluster is available on this node ...\n");
587943
-        }
587943
+        print_as(output_format ,"Waiting until cluster is available on this node ...\n");
587943
 
587943
         rc = fencing_connect();
587943
         if (rc == pcmk_ok) {
587943
-            rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
587943
+            rc = cib_connect(TRUE);
587943
         }
587943
 
587943
-        if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
-            break;
587943
-
587943
-        } else if (rc != pcmk_ok) {
587943
+        if (rc != pcmk_ok) {
587943
             sleep(options.reconnect_msec / 1000);
587943
 #if CURSES_ENABLED
587943
             if (output_format == mon_output_console) {
587943
@@ -1402,24 +1436,7 @@ main(int argc, char **argv)
587943
 
587943
     } while (rc == -ENOTCONN);
587943
 
587943
-    if (rc != pcmk_ok) {
587943
-        if (output_format == mon_output_monitor) {
587943
-            g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
587943
-                        pcmk_strerror(rc));
587943
-            return clean_up(MON_STATUS_CRIT);
587943
-        } else {
587943
-            if (rc == -ENOTCONN) {
587943
-                g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
587943
-            } else {
587943
-                g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
587943
-            }
587943
-        }
587943
-        return clean_up(crm_errno2exit(rc));
587943
-    }
587943
-
587943
-    if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
587943
-        return clean_up(CRM_EX_OK);
587943
-    }
587943
+    handle_connection_failures(rc);
587943
 
587943
     mainloop = g_main_loop_new(NULL, FALSE);
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 0eb307a19d57d4a59a4b51a64a3b62dcd0b7cc9a Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Wed, 13 Jan 2021 12:47:41 -0500
587943
Subject: [PATCH 04/11] Refactor: tools: Don't call mon_refresh_display from
587943
 cib_connect.
587943
587943
---
587943
 tools/crm_mon.c | 6 +++---
587943
 1 file changed, 3 insertions(+), 3 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index b33598b..b0daf76 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -683,6 +683,7 @@ mon_timer_popped(gpointer data)
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
     if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
587943
+        mon_refresh_display(NULL);
587943
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
     }
587943
     return FALSE;
587943
@@ -831,9 +832,6 @@ cib_connect(gboolean full)
587943
     }
587943
 
587943
     rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
587943
-    if (rc == pcmk_ok) {
587943
-        mon_refresh_display(NULL);
587943
-    }
587943
 
587943
     if (rc == pcmk_ok && full) {
587943
         rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular);
587943
@@ -1192,6 +1190,7 @@ one_shot()
587943
     if (rc == pcmk_rc_ok) {
587943
         rc = cib_connect(FALSE);
587943
         handle_connection_failures(rc);
587943
+        mon_refresh_display(NULL);
587943
     }
587943
 
587943
     clean_up(CRM_EX_OK);
587943
@@ -1437,6 +1436,7 @@ main(int argc, char **argv)
587943
     } while (rc == -ENOTCONN);
587943
 
587943
     handle_connection_failures(rc);
587943
+    mon_refresh_display(NULL);
587943
 
587943
     mainloop = g_main_loop_new(NULL, FALSE);
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 46696d3135e699c58918e41c93c357d951146d5c Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Wed, 13 Jan 2021 13:52:49 -0500
587943
Subject: [PATCH 05/11] Fix: tools: Report if getting fencing history failed in
587943
 crm_mon.
587943
587943
This just takes history_rc into account in the text and html formatters.
587943
It was already used by the XML formatter.  If we can't get fencing
587943
history, add a message to the output indicating that happened.
587943
---
587943
 tools/crm_mon.c       | 13 +++++----
587943
 tools/crm_mon.h       | 12 ++++-----
587943
 tools/crm_mon_print.c | 74 ++++++++++++++++++++++++++++++++++++++-------------
587943
 3 files changed, 70 insertions(+), 29 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index b0daf76..1a68555 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -1943,7 +1943,8 @@ mon_refresh_display(gpointer user_data)
587943
     switch (output_format) {
587943
         case mon_output_html:
587943
         case mon_output_cgi:
587943
-            if (print_html_status(out, mon_data_set, stonith_history, options.mon_ops,
587943
+            if (print_html_status(out, mon_data_set, crm_errno2exit(history_rc),
587943
+                                  stonith_history, options.mon_ops,
587943
                                   show, options.neg_location_prefix,
587943
                                   options.only_node, options.only_rsc) != 0) {
587943
                 g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file");
587943
@@ -1974,15 +1975,17 @@ mon_refresh_display(gpointer user_data)
587943
              */
587943
 #if CURSES_ENABLED
587943
             blank_screen();
587943
-            print_status(out, mon_data_set, stonith_history, options.mon_ops, show,
587943
-                         options.neg_location_prefix, options.only_node, options.only_rsc);
587943
+            print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history,
587943
+                         options.mon_ops, show, options.neg_location_prefix,
587943
+                         options.only_node, options.only_rsc);
587943
             refresh();
587943
             break;
587943
 #endif
587943
 
587943
         case mon_output_plain:
587943
-            print_status(out, mon_data_set, stonith_history, options.mon_ops, show,
587943
-                         options.neg_location_prefix, options.only_node, options.only_rsc);
587943
+            print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history,
587943
+                         options.mon_ops, show, options.neg_location_prefix,
587943
+                         options.only_node, options.only_rsc);
587943
             break;
587943
 
587943
         case mon_output_unset:
587943
diff --git a/tools/crm_mon.h b/tools/crm_mon.h
587943
index f746507..73c926d 100644
587943
--- a/tools/crm_mon.h
587943
+++ b/tools/crm_mon.h
587943
@@ -95,17 +95,17 @@ typedef enum mon_output_format_e {
587943
 #define mon_op_default              (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect)
587943
 
587943
 void print_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
-                  stonith_history_t *stonith_history, unsigned int mon_ops,
587943
-                  unsigned int show, char *prefix, char *only_node,
587943
-                  char *only_rsc);
587943
+                  crm_exit_t history_rc, stonith_history_t *stonith_history,
587943
+                  unsigned int mon_ops, unsigned int show, char *prefix,
587943
+                  char *only_node, char *only_rsc);
587943
 void print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
                       crm_exit_t history_rc, stonith_history_t *stonith_history,
587943
                       unsigned int mon_ops, unsigned int show, char *prefix,
587943
                       char *only_node, char *only_rsc);
587943
 int print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
-                      stonith_history_t *stonith_history, unsigned int mon_ops,
587943
-                      unsigned int show, char *prefix, char *only_node,
587943
-                      char *only_rsc);
587943
+                      crm_exit_t history_rc, stonith_history_t *stonith_history,
587943
+                      unsigned int mon_ops, unsigned int show, char *prefix,
587943
+                      char *only_node, char *only_rsc);
587943
 
587943
 GList *append_attr_list(GList *attr_list, char *name);
587943
 void blank_screen(void);
587943
diff --git a/tools/crm_mon_print.c b/tools/crm_mon_print.c
587943
index 8ae11bf..73406bd 100644
587943
--- a/tools/crm_mon_print.c
587943
+++ b/tools/crm_mon_print.c
587943
@@ -656,6 +656,7 @@ print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set,
587943
  *
587943
  * \param[in] out             The output functions structure.
587943
  * \param[in] data_set        Cluster state to display.
587943
+ * \param[in] history_rc      Result of getting stonith history
587943
  * \param[in] stonith_history List of stonith actions.
587943
  * \param[in] mon_ops         Bitmask of mon_op_*.
587943
  * \param[in] show            Bitmask of mon_show_*.
587943
@@ -663,14 +664,16 @@ print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set,
587943
  */
587943
 void
587943
 print_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
-             stonith_history_t *stonith_history, unsigned int mon_ops,
587943
-             unsigned int show, char *prefix, char *only_node, char *only_rsc)
587943
+             crm_exit_t history_rc, stonith_history_t *stonith_history,
587943
+             unsigned int mon_ops, unsigned int show, char *prefix,
587943
+             char *only_node, char *only_rsc)
587943
 {
587943
     GListPtr unames = NULL;
587943
     GListPtr resources = NULL;
587943
 
587943
     unsigned int print_opts = get_resource_display_options(mon_ops);
587943
     int rc = pcmk_rc_no_output;
587943
+    bool already_printed_failure = false;
587943
 
587943
     CHECK_RC(rc, out->message(out, "cluster-summary", data_set,
587943
                               pcmk_is_set(mon_ops, mon_op_print_clone_detail),
587943
@@ -731,13 +734,23 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
     if (pcmk_is_set(show, mon_show_fence_failed)
587943
         && pcmk_is_set(mon_ops, mon_op_fence_history)) {
587943
 
587943
-        stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
587943
-                                                              GINT_TO_POINTER(st_failed));
587943
+        if (history_rc == 0) {
587943
+            stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
587943
+                                                                  GINT_TO_POINTER(st_failed));
587943
+
587943
+            if (hp) {
587943
+                CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames,
587943
+                                          pcmk_is_set(mon_ops, mon_op_fence_full_history),
587943
+                                          rc == pcmk_rc_ok));
587943
+            }
587943
+        } else {
587943
+            PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
587943
+            out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
587943
+            out->list_item(out, NULL, "Failed to get fencing history: %s",
587943
+                           crm_exit_str(history_rc));
587943
+            out->end_list(out);
587943
 
587943
-        if (hp) {
587943
-            CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames,
587943
-                                      pcmk_is_set(mon_ops, mon_op_fence_full_history),
587943
-                                      rc == pcmk_rc_ok));
587943
+            already_printed_failure = true;
587943
         }
587943
     }
587943
 
587943
@@ -754,7 +767,15 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
 
587943
     /* Print stonith history */
587943
     if (pcmk_is_set(mon_ops, mon_op_fence_history)) {
587943
-        if (pcmk_is_set(show, mon_show_fence_worked)) {
587943
+        if (history_rc != 0) {
587943
+            if (!already_printed_failure) {
587943
+                PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
587943
+                out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
587943
+                out->list_item(out, NULL, "Failed to get fencing history: %s",
587943
+                               crm_exit_str(history_rc));
587943
+                out->end_list(out);
587943
+            }
587943
+        } else if (pcmk_is_set(show, mon_show_fence_worked)) {
587943
             stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
587943
                                                                   GINT_TO_POINTER(st_failed));
587943
 
587943
@@ -783,6 +804,7 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
  *
587943
  * \param[in] out             The output functions structure.
587943
  * \param[in] data_set        Cluster state to display.
587943
+ * \param[in] history_rc      Result of getting stonith history
587943
  * \param[in] stonith_history List of stonith actions.
587943
  * \param[in] mon_ops         Bitmask of mon_op_*.
587943
  * \param[in] show            Bitmask of mon_show_*.
587943
@@ -878,6 +900,7 @@ print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
  *
587943
  * \param[in] out             The output functions structure.
587943
  * \param[in] data_set        Cluster state to display.
587943
+ * \param[in] history_rc      Result of getting stonith history
587943
  * \param[in] stonith_history List of stonith actions.
587943
  * \param[in] mon_ops         Bitmask of mon_op_*.
587943
  * \param[in] show            Bitmask of mon_show_*.
587943
@@ -885,14 +908,15 @@ print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
  */
587943
 int
587943
 print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
-                  stonith_history_t *stonith_history, unsigned int mon_ops,
587943
-                  unsigned int show, char *prefix, char *only_node,
587943
-                  char *only_rsc)
587943
+                  crm_exit_t history_rc, stonith_history_t *stonith_history,
587943
+                  unsigned int mon_ops, unsigned int show, char *prefix,
587943
+                  char *only_node, char *only_rsc)
587943
 {
587943
     GListPtr unames = NULL;
587943
     GListPtr resources = NULL;
587943
 
587943
     unsigned int print_opts = get_resource_display_options(mon_ops);
587943
+    bool already_printed_failure = false;
587943
 
587943
     out->message(out, "cluster-summary", data_set,
587943
                  pcmk_is_set(mon_ops, mon_op_print_clone_detail),
587943
@@ -950,18 +974,32 @@ print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
587943
     if (pcmk_is_set(show, mon_show_fence_failed)
587943
         && pcmk_is_set(mon_ops, mon_op_fence_history)) {
587943
 
587943
-        stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
587943
-                                                              GINT_TO_POINTER(st_failed));
587943
+        if (history_rc == 0) {
587943
+            stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
587943
+                                                                  GINT_TO_POINTER(st_failed));
587943
 
587943
-        if (hp) {
587943
-            out->message(out, "failed-fencing-list", stonith_history, unames,
587943
-                         pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE);
587943
+            if (hp) {
587943
+                out->message(out, "failed-fencing-list", stonith_history, unames,
587943
+                             pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE);
587943
+            }
587943
+        } else {
587943
+            out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
587943
+            out->list_item(out, NULL, "Failed to get fencing history: %s",
587943
+                           crm_exit_str(history_rc));
587943
+            out->end_list(out);
587943
         }
587943
     }
587943
 
587943
     /* Print stonith history */
587943
     if (pcmk_is_set(mon_ops, mon_op_fence_history)) {
587943
-        if (pcmk_is_set(show, mon_show_fence_worked)) {
587943
+        if (history_rc != 0) {
587943
+            if (!already_printed_failure) {
587943
+                out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
587943
+                out->list_item(out, NULL, "Failed to get fencing history: %s",
587943
+                               crm_exit_str(history_rc));
587943
+                out->end_list(out);
587943
+            }
587943
+        } else if (pcmk_is_set(show, mon_show_fence_worked)) {
587943
             stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
587943
                                                                   GINT_TO_POINTER(st_failed));
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 2e391be6fdbbbccd6aef49b3f109e5c342eb5dcc Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Mon, 11 Jan 2021 12:54:40 -0500
587943
Subject: [PATCH 06/11] Fix: tools: A lack of stonith history is not fatal in
587943
 crm_mon.
587943
587943
Instead, print out all the rest of the typical output.  This should also
587943
include an error message in the fencing section, if that section was
587943
requested.
587943
587943
See: rhbz#1880426
587943
---
587943
 tools/crm_mon.c | 40 ++++++++++++++++------------------------
587943
 1 file changed, 16 insertions(+), 24 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 1a68555..17b8ee9 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -682,7 +682,8 @@ mon_timer_popped(gpointer data)
587943
     }
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
-    if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
587943
+    fencing_connect();
587943
+    if (cib_connect(TRUE) == pcmk_ok) {
587943
         mon_refresh_display(NULL);
587943
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
     }
587943
@@ -726,12 +727,6 @@ mon_cib_connection_destroy_regular(gpointer user_data)
587943
     do_mon_cib_connection_destroy(user_data, false);
587943
 }
587943
 
587943
-static void
587943
-mon_cib_connection_destroy_error(gpointer user_data)
587943
-{
587943
-    do_mon_cib_connection_destroy(user_data, true);
587943
-}
587943
-
587943
 /*
587943
  * Mainloop signal handler.
587943
  */
587943
@@ -790,6 +785,8 @@ fencing_connect(void)
587943
                                             mon_st_callback_display);
587943
             st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
587943
         }
587943
+    } else {
587943
+        st = NULL;
587943
     }
587943
 
587943
     return rc;
587943
@@ -1185,12 +1182,15 @@ handle_connection_failures(int rc)
587943
 static void
587943
 one_shot()
587943
 {
587943
-    int rc = fencing_connect();
587943
+    int rc;
587943
+
587943
+    fencing_connect();
587943
 
587943
+    rc = cib_connect(FALSE);
587943
     if (rc == pcmk_rc_ok) {
587943
-        rc = cib_connect(FALSE);
587943
-        handle_connection_failures(rc);
587943
         mon_refresh_display(NULL);
587943
+    } else {
587943
+        handle_connection_failures(rc);
587943
     }
587943
 
587943
     clean_up(CRM_EX_OK);
587943
@@ -1416,10 +1416,8 @@ main(int argc, char **argv)
587943
     do {
587943
         print_as(output_format ,"Waiting until cluster is available on this node ...\n");
587943
 
587943
-        rc = fencing_connect();
587943
-        if (rc == pcmk_ok) {
587943
-            rc = cib_connect(TRUE);
587943
-        }
587943
+        fencing_connect();
587943
+        rc = cib_connect(TRUE);
587943
 
587943
         if (rc != pcmk_ok) {
587943
             sleep(options.reconnect_msec / 1000);
587943
@@ -1896,16 +1894,12 @@ mon_refresh_display(gpointer user_data)
587943
         return 0;
587943
     }
587943
 
587943
-    /* get the stonith-history if there is evidence we need it
587943
-     */
587943
+    /* get the stonith-history if there is evidence we need it */
587943
     while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
587943
         if (st != NULL) {
587943
             history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120);
587943
 
587943
-            if (history_rc != 0) {
587943
-                out->err(out, "Critical: Unable to get stonith-history");
587943
-                mon_cib_connection_destroy_error(NULL);
587943
-            } else {
587943
+            if (history_rc == 0) {
587943
                 stonith_history = stonith__sort_history(stonith_history);
587943
                 if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
587943
                     && (output_format != mon_output_xml)) {
587943
@@ -1915,11 +1909,9 @@ mon_refresh_display(gpointer user_data)
587943
                 break; /* all other cases are errors */
587943
             }
587943
         } else {
587943
-            out->err(out, "Critical: No stonith-API");
587943
+            history_rc = ENOTCONN;
587943
+            break;
587943
         }
587943
-        free_xml(cib_copy);
587943
-        out->err(out, "Reading stonith-history failed");
587943
-        return 0;
587943
     }
587943
 
587943
     if (mon_data_set == NULL) {
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 8abcb2bf0c5c90004a687e27aa86fd6ad1b62eb3 Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Thu, 14 Jan 2021 14:31:25 -0500
587943
Subject: [PATCH 07/11] Refactor: Split the fencing history code into its own
587943
 function.
587943
587943
---
587943
 tools/crm_mon.c | 46 ++++++++++++++++++++++++++++------------------
587943
 1 file changed, 28 insertions(+), 18 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 17b8ee9..1baba5f 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -1879,6 +1879,33 @@ crm_diff_update(const char *event, xmlNode * msg)
587943
 }
587943
 
587943
 static int
587943
+get_fencing_history(stonith_history_t **stonith_history)
587943
+{
587943
+    int rc = 0;
587943
+
587943
+    while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
587943
+        if (st != NULL) {
587943
+            rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120);
587943
+
587943
+            if (rc == 0) {
587943
+                *stonith_history = stonith__sort_history(*stonith_history);
587943
+                if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
587943
+                    && (output_format != mon_output_xml)) {
587943
+
587943
+                    *stonith_history = pcmk__reduce_fence_history(*stonith_history);
587943
+                }
587943
+                break; /* all other cases are errors */
587943
+            }
587943
+        } else {
587943
+            rc = ENOTCONN;
587943
+            break;
587943
+        }
587943
+    }
587943
+
587943
+    return rc;
587943
+}
587943
+
587943
+static int
587943
 mon_refresh_display(gpointer user_data)
587943
 {
587943
     xmlNode *cib_copy = copy_xml(current_cib);
587943
@@ -1895,24 +1922,7 @@ mon_refresh_display(gpointer user_data)
587943
     }
587943
 
587943
     /* get the stonith-history if there is evidence we need it */
587943
-    while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
587943
-        if (st != NULL) {
587943
-            history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120);
587943
-
587943
-            if (history_rc == 0) {
587943
-                stonith_history = stonith__sort_history(stonith_history);
587943
-                if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
587943
-                    && (output_format != mon_output_xml)) {
587943
-
587943
-                    stonith_history = pcmk__reduce_fence_history(stonith_history);
587943
-                }
587943
-                break; /* all other cases are errors */
587943
-            }
587943
-        } else {
587943
-            history_rc = ENOTCONN;
587943
-            break;
587943
-        }
587943
-    }
587943
+    history_rc = get_fencing_history(&stonith_history);
587943
 
587943
     if (mon_data_set == NULL) {
587943
         mon_data_set = pe_new_working_set();
587943
-- 
587943
1.8.3.1
587943
587943
587943
From fa75e884e3c3822e1010ad1d67958e4f1cc5400b Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Thu, 14 Jan 2021 14:49:09 -0500
587943
Subject: [PATCH 08/11] Refactor: tools: Get rid of
587943
 mon_cib_connection_destroy_regular.
587943
587943
With the _error version removed in a previous commit, there's no need
587943
for this wrapper to exist anymore.  We can just call
587943
mon_cib_connection_destroy directly.
587943
---
587943
 tools/crm_mon.c | 22 ++++++----------------
587943
 1 file changed, 6 insertions(+), 16 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 1baba5f..a0764a5 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -691,13 +691,9 @@ mon_timer_popped(gpointer data)
587943
 }
587943
 
587943
 static void
587943
-do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
587943
+mon_cib_connection_destroy(gpointer user_data)
587943
 {
587943
-    if (is_error) {
587943
-        out->err(out, "Connection to the cluster-daemons terminated");
587943
-    } else {
587943
-        out->info(out, "Connection to the cluster-daemons terminated");
587943
-    }
587943
+    out->info(out, "Connection to the cluster-daemons terminated");
587943
 
587943
     if (refresh_timer != NULL) {
587943
         /* we'll trigger a refresh after reconnect */
587943
@@ -721,12 +717,6 @@ do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
587943
     return;
587943
 }
587943
 
587943
-static void
587943
-mon_cib_connection_destroy_regular(gpointer user_data)
587943
-{
587943
-    do_mon_cib_connection_destroy(user_data, false);
587943
-}
587943
-
587943
 /*
587943
  * Mainloop signal handler.
587943
  */
587943
@@ -831,7 +821,7 @@ cib_connect(gboolean full)
587943
     rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
587943
 
587943
     if (rc == pcmk_ok && full) {
587943
-        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular);
587943
+        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
587943
         if (rc == -EPROTONOSUPPORT) {
587943
             print_as
587943
                 (output_format, "Notification setup not supported, won't be able to reconnect after failure");
587943
@@ -890,7 +880,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
587943
                     options.mon_ops |= mon_op_fence_history;
587943
                     options.mon_ops |= mon_op_fence_connect;
587943
                     if (st == NULL) {
587943
-                        mon_cib_connection_destroy_regular(NULL);
587943
+                        mon_cib_connection_destroy(NULL);
587943
                     }
587943
                 }
587943
 
587943
@@ -2010,7 +2000,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
587943
 {
587943
     if (st->state == stonith_disconnected) {
587943
         /* disconnect cib as well and have everything reconnect */
587943
-        mon_cib_connection_destroy_regular(NULL);
587943
+        mon_cib_connection_destroy(NULL);
587943
     } else if (options.external_agent) {
587943
         char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
587943
                                     e->operation, e->origin, e->target, pcmk_strerror(e->result),
587943
@@ -2059,7 +2049,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
587943
 {
587943
     if (st->state == stonith_disconnected) {
587943
         /* disconnect cib as well and have everything reconnect */
587943
-        mon_cib_connection_destroy_regular(NULL);
587943
+        mon_cib_connection_destroy(NULL);
587943
     } else {
587943
         print_dot(output_format);
587943
         kick_refresh(TRUE);
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 009f3aa0caa6d138d4da418297f12c4a1210cf6b Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Thu, 14 Jan 2021 16:25:37 -0500
587943
Subject: [PATCH 09/11] Refactor: Add comments to connection functions in
587943
 crm_mon.c.
587943
587943
There are an awful lot of these functions, and trying to make sense of
587943
them can be confusing when there's no comments explaining when they
587943
happen.  Hopefully this helps a little.
587943
---
587943
 tools/crm_mon.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++--------
587943
 1 file changed, 48 insertions(+), 8 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index a0764a5..54a7958 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -666,6 +666,10 @@ static GOptionEntry deprecated_entries[] = {
587943
 };
587943
 /* *INDENT-ON* */
587943
 
587943
+/* Reconnect to the CIB and fencing agent after reconnect_msec has passed.  This sounds
587943
+ * like it would be more broadly useful, but only ever happens after a disconnect via
587943
+ * mon_cib_connection_destroy.
587943
+ */
587943
 static gboolean
587943
 mon_timer_popped(gpointer data)
587943
 {
587943
@@ -684,12 +688,17 @@ mon_timer_popped(gpointer data)
587943
     print_as(output_format, "Reconnecting...\n");
587943
     fencing_connect();
587943
     if (cib_connect(TRUE) == pcmk_ok) {
587943
+        /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
         mon_refresh_display(NULL);
587943
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
     }
587943
     return FALSE;
587943
 }
587943
 
587943
+/* Called from various places when we are disconnected from the CIB or from the
587943
+ * fencing agent.  If the CIB connection is still valid, this function will also
587943
+ * attempt to sign off and reconnect.
587943
+ */
587943
 static void
587943
 mon_cib_connection_destroy(gpointer user_data)
587943
 {
587943
@@ -717,9 +726,7 @@ mon_cib_connection_destroy(gpointer user_data)
587943
     return;
587943
 }
587943
 
587943
-/*
587943
- * Mainloop signal handler.
587943
- */
587943
+/* Signal handler installed into the mainloop for normal program shutdown */
587943
 static void
587943
 mon_shutdown(int nsig)
587943
 {
587943
@@ -729,6 +736,10 @@ mon_shutdown(int nsig)
587943
 #if CURSES_ENABLED
587943
 static sighandler_t ncurses_winch_handler;
587943
 
587943
+/* Signal handler installed the regular way (not into the main loop) for when
587943
+ * the screen is resized.  Commonly, this happens when running in an xterm and
587943
+ * the user changes its size.
587943
+ */
587943
 static void
587943
 mon_winresize(int nsig)
587943
 {
587943
@@ -743,6 +754,9 @@ mon_winresize(int nsig)
587943
             (*ncurses_winch_handler) (SIGWINCH);
587943
         getmaxyx(stdscr, lines, cols);
587943
         resizeterm(lines, cols);
587943
+        /* Alert the mainloop code we'd like the refresh_trigger to run next
587943
+         * time the mainloop gets around to checking.
587943
+         */
587943
         mainloop_set_trigger(refresh_trigger);
587943
     }
587943
     not_done--;
587943
@@ -863,6 +877,12 @@ get_option_desc(char c)
587943
 #define print_option_help(output_format, option, condition) \
587943
     out->info(out, "%c %c: \t%s", ((condition)? '*': ' '), option, get_option_desc(option));
587943
 
587943
+/* This function is called from the main loop when there is something to be read
587943
+ * on stdin, like an interactive user's keystroke.  All it does is read the keystroke,
587943
+ * set flags (or show the page showing which keystrokes are valid), and redraw the
587943
+ * screen.  It does not do anything with connections to the CIB or fencing agent
587943
+ * agent what would happen in mon_refresh_display.
587943
+ */
587943
 static gboolean
587943
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
587943
 {
587943
@@ -951,6 +971,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
587943
                 config_mode = TRUE;
587943
                 break;
587943
             default:
587943
+                /* All other keys just redraw the screen. */
587943
                 goto refresh;
587943
         }
587943
 
587943
@@ -1441,6 +1462,10 @@ main(int argc, char **argv)
587943
         g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL);
587943
     }
587943
 #endif
587943
+
587943
+    /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display.  In
587943
+     * this file, that is anywhere mainloop_set_trigger is called.
587943
+     */
587943
     refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
587943
 
587943
     g_main_loop_run(mainloop);
587943
@@ -1677,6 +1702,10 @@ handle_rsc_op(xmlNode * xml, const char *node_id)
587943
     free(task);
587943
 }
587943
 
587943
+/* This function is just a wrapper around mainloop_set_trigger so that it can be
587943
+ * called from a mainloop directly.  It's simply another way of ensuring the screen
587943
+ * gets redrawn.
587943
+ */
587943
 static gboolean
587943
 mon_trigger_refresh(gpointer user_data)
587943
 {
587943
@@ -1995,6 +2024,9 @@ mon_refresh_display(gpointer user_data)
587943
     return 1;
587943
 }
587943
 
587943
+/* This function is called for fencing events (see fencing_connect for which ones) when
587943
+ * --watch-fencing is used on the command line.
587943
+ */
587943
 static void
587943
 mon_st_callback_event(stonith_t * st, stonith_event_t * e)
587943
 {
587943
@@ -2010,6 +2042,16 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
587943
     }
587943
 }
587943
 
587943
+/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
587943
+ *
587943
+ * - If the last update occurred more than reconnect_msec ago (defaults to 5s, but can be
587943
+ *   changed via the -i command line option), or
587943
+ * - After every 10 CIB updates, or
587943
+ * - If it's been 2s since the last update
587943
+ *
587943
+ * This function sounds like it would be more broadly useful, but it is only called when a
587943
+ * fencing event is received or a CIB diff occurrs.
587943
+ */
587943
 static void
587943
 kick_refresh(gboolean data_updated)
587943
 {
587943
@@ -2024,11 +2066,6 @@ kick_refresh(gboolean data_updated)
587943
         refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
587943
     }
587943
 
587943
-    /* Refresh
587943
-     * - immediately if the last update was more than 5s ago
587943
-     * - every 10 cib-updates
587943
-     * - at most 2s after the last update
587943
-     */
587943
     if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
587943
         mainloop_set_trigger(refresh_trigger);
587943
         mainloop_timer_stop(refresh_timer);
587943
@@ -2044,6 +2081,9 @@ kick_refresh(gboolean data_updated)
587943
     }
587943
 }
587943
 
587943
+/* This function is called for fencing events (see fencing_connect for which ones) when
587943
+ * --watch-fencing is NOT used on the command line.
587943
+ */
587943
 static void
587943
 mon_st_callback_display(stonith_t * st, stonith_event_t * e)
587943
 {
587943
-- 
587943
1.8.3.1
587943
587943
587943
From aa328f0788ef0057874aeeeae7261dfb450b9b9e Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Thu, 14 Jan 2021 16:44:45 -0500
587943
Subject: [PATCH 10/11] Refactor: tools: Rename some connection-related symbols
587943
 in crm_mon.
587943
587943
---
587943
 tools/crm_mon.c | 28 ++++++++++++++--------------
587943
 1 file changed, 14 insertions(+), 14 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 54a7958..89d7ae2 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -66,7 +66,7 @@ static mon_output_format_t output_format = mon_output_unset;
587943
 /* other globals */
587943
 static GIOChannel *io_channel = NULL;
587943
 static GMainLoop *mainloop = NULL;
587943
-static guint timer_id = 0;
587943
+static guint reconnect_timer = 0;
587943
 static mainloop_timer_t *refresh_timer = NULL;
587943
 static pe_working_set_t *mon_data_set = NULL;
587943
 
587943
@@ -131,7 +131,7 @@ static int cib_connect(gboolean full);
587943
 static int fencing_connect(void);
587943
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
587943
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
587943
-static void kick_refresh(gboolean data_updated);
587943
+static void refresh_after_event(gboolean data_updated);
587943
 
587943
 static unsigned int
587943
 all_includes(mon_output_format_t fmt) {
587943
@@ -671,7 +671,7 @@ static GOptionEntry deprecated_entries[] = {
587943
  * mon_cib_connection_destroy.
587943
  */
587943
 static gboolean
587943
-mon_timer_popped(gpointer data)
587943
+reconnect_after_timeout(gpointer data)
587943
 {
587943
 #if CURSES_ENABLED
587943
     if (output_format == mon_output_console) {
587943
@@ -680,9 +680,9 @@ mon_timer_popped(gpointer data)
587943
     }
587943
 #endif
587943
 
587943
-    if (timer_id > 0) {
587943
-        g_source_remove(timer_id);
587943
-        timer_id = 0;
587943
+    if (reconnect_timer > 0) {
587943
+        g_source_remove(reconnect_timer);
587943
+        reconnect_timer = 0;
587943
     }
587943
 
587943
     print_as(output_format, "Reconnecting...\n");
587943
@@ -690,7 +690,7 @@ mon_timer_popped(gpointer data)
587943
     if (cib_connect(TRUE) == pcmk_ok) {
587943
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
         mon_refresh_display(NULL);
587943
-        timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
+        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
     }
587943
     return FALSE;
587943
 }
587943
@@ -708,10 +708,10 @@ mon_cib_connection_destroy(gpointer user_data)
587943
         /* we'll trigger a refresh after reconnect */
587943
         mainloop_timer_stop(refresh_timer);
587943
     }
587943
-    if (timer_id) {
587943
+    if (reconnect_timer) {
587943
         /* we'll trigger a new reconnect-timeout at the end */
587943
-        g_source_remove(timer_id);
587943
-        timer_id = 0;
587943
+        g_source_remove(reconnect_timer);
587943
+        reconnect_timer = 0;
587943
     }
587943
     if (st) {
587943
         /* the client API won't properly reconnect notifications
587943
@@ -721,7 +721,7 @@ mon_cib_connection_destroy(gpointer user_data)
587943
     }
587943
     if (cib) {
587943
         cib->cmds->signoff(cib);
587943
-        timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
587943
+        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
     }
587943
     return;
587943
 }
587943
@@ -1894,7 +1894,7 @@ crm_diff_update(const char *event, xmlNode * msg)
587943
     }
587943
 
587943
     stale = FALSE;
587943
-    kick_refresh(cib_updated);
587943
+    refresh_after_event(cib_updated);
587943
 }
587943
 
587943
 static int
587943
@@ -2053,7 +2053,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
587943
  * fencing event is received or a CIB diff occurrs.
587943
  */
587943
 static void
587943
-kick_refresh(gboolean data_updated)
587943
+refresh_after_event(gboolean data_updated)
587943
 {
587943
     static int updates = 0;
587943
     time_t now = time(NULL);
587943
@@ -2092,7 +2092,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
587943
         mon_cib_connection_destroy(NULL);
587943
     } else {
587943
         print_dot(output_format);
587943
-        kick_refresh(TRUE);
587943
+        refresh_after_event(TRUE);
587943
     }
587943
 }
587943
 
587943
-- 
587943
1.8.3.1
587943
587943
587943
From 8c51b4980f349e8773681f7ed2882ca639e0e63a Mon Sep 17 00:00:00 2001
587943
From: Chris Lumens <clumens@redhat.com>
587943
Date: Mon, 18 Jan 2021 14:03:39 -0500
587943
Subject: [PATCH 11/11] Fix: tools: Attempt to reestablish connections in
587943
 crm_mon.
587943
587943
If the fencing or CIB connections go away between screen refreshes,
587943
attempt to re-establish those connections.  The functions that do this
587943
should be safe to be called repeatedly.
587943
587943
See: rhbz#1880426, rhbz#1466875
587943
---
587943
 tools/crm_mon.c | 17 ++++++++++++++---
587943
 1 file changed, 14 insertions(+), 3 deletions(-)
587943
587943
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
587943
index 89d7ae2..083b7ae 100644
587943
--- a/tools/crm_mon.c
587943
+++ b/tools/crm_mon.c
587943
@@ -126,6 +126,7 @@ static void clean_up_cib_connection(void);
587943
 static void clean_up_fencing_connection(void);
587943
 static crm_exit_t clean_up(crm_exit_t exit_code);
587943
 static void crm_diff_update(const char *event, xmlNode * msg);
587943
+static void handle_connection_failures(int rc);
587943
 static int mon_refresh_display(gpointer user_data);
587943
 static int cib_connect(gboolean full);
587943
 static int fencing_connect(void);
587943
@@ -690,9 +691,11 @@ reconnect_after_timeout(gpointer data)
587943
     if (cib_connect(TRUE) == pcmk_ok) {
587943
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
587943
         mon_refresh_display(NULL);
587943
-        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
+        return FALSE;
587943
     }
587943
-    return FALSE;
587943
+
587943
+    reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
587943
+    return TRUE;
587943
 }
587943
 
587943
 /* Called from various places when we are disconnected from the CIB or from the
587943
@@ -887,6 +890,7 @@ static gboolean
587943
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
587943
 {
587943
     int c;
587943
+    int rc;
587943
     gboolean config_mode = FALSE;
587943
 
587943
     while (1) {
587943
@@ -1001,7 +1005,14 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
587943
     }
587943
 
587943
 refresh:
587943
-    mon_refresh_display(NULL);
587943
+    fencing_connect();
587943
+    rc = cib_connect(FALSE);
587943
+    if (rc == pcmk_rc_ok) {
587943
+        mon_refresh_display(NULL);
587943
+    } else {
587943
+        handle_connection_failures(rc);
587943
+    }
587943
+
587943
     return TRUE;
587943
 }
587943
 #endif
587943
-- 
587943
1.8.3.1
587943