c563b9
From bc91cc5d8b4257627d09103cf676cd83656bda8c Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Tue, 12 Jan 2021 10:45:53 -0500
c563b9
Subject: [PATCH 01/11] Refactor: tools: Split up connection teardown in
c563b9
 crm_mon.
c563b9
c563b9
We don't necessarily want to tear down the fencing and CIB connections
c563b9
at the same time always.  This can then immediately be used in
c563b9
mon_refresh_display and do_mon_cib_connection_destroy.
c563b9
---
c563b9
 tools/crm_mon.c | 57 +++++++++++++++++++++++++++++++--------------------------
c563b9
 1 file changed, 31 insertions(+), 26 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 8ec97bb..fc20e4c 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -122,7 +122,8 @@ struct {
c563b9
     .mon_ops = mon_op_default
c563b9
 };
c563b9
 
c563b9
-static void clean_up_connections(void);
c563b9
+static void clean_up_cib_connection(void);
c563b9
+static void clean_up_fencing_connection(void);
c563b9
 static crm_exit_t clean_up(crm_exit_t exit_code);
c563b9
 static void crm_diff_update(const char *event, xmlNode * msg);
c563b9
 static int mon_refresh_display(gpointer user_data);
c563b9
@@ -712,12 +713,7 @@ do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
c563b9
         /* the client API won't properly reconnect notifications
c563b9
          * if they are still in the table - so remove them
c563b9
          */
c563b9
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
c563b9
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
c563b9
-        st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
c563b9
-        if (st->state != stonith_disconnected) {
c563b9
-            st->cmds->disconnect(st);
c563b9
-        }
c563b9
+        clean_up_fencing_connection();
c563b9
     }
c563b9
     if (cib) {
c563b9
         cib->cmds->signoff(cib);
c563b9
@@ -851,7 +847,8 @@ cib_connect(gboolean full)
c563b9
 
c563b9
         if (rc != pcmk_ok) {
c563b9
             out->err(out, "Notification setup failed, could not monitor CIB actions");
c563b9
-            clean_up_connections();
c563b9
+            clean_up_cib_connection();
c563b9
+            clean_up_fencing_connection();
c563b9
         }
c563b9
     }
c563b9
     return rc;
c563b9
@@ -1866,9 +1863,7 @@ mon_refresh_display(gpointer user_data)
c563b9
     last_refresh = time(NULL);
c563b9
 
c563b9
     if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
c563b9
-        if (cib) {
c563b9
-            cib->cmds->signoff(cib);
c563b9
-        }
c563b9
+        clean_up_cib_connection();
c563b9
         out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation));
c563b9
         clean_up(CRM_EX_CONFIG);
c563b9
         return 0;
c563b9
@@ -2040,24 +2035,33 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
c563b9
 }
c563b9
 
c563b9
 static void
c563b9
-clean_up_connections(void)
c563b9
+clean_up_cib_connection(void)
c563b9
 {
c563b9
-    if (cib != NULL) {
c563b9
-        cib->cmds->signoff(cib);
c563b9
-        cib_delete(cib);
c563b9
-        cib = NULL;
c563b9
+    if (cib == NULL) {
c563b9
+        return;
c563b9
     }
c563b9
 
c563b9
-    if (st != NULL) {
c563b9
-        if (st->state != stonith_disconnected) {
c563b9
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
c563b9
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
c563b9
-            st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
c563b9
-            st->cmds->disconnect(st);
c563b9
-        }
c563b9
-        stonith_api_delete(st);
c563b9
-        st = NULL;
c563b9
+    cib->cmds->signoff(cib);
c563b9
+    cib_delete(cib);
c563b9
+    cib = NULL;
c563b9
+}
c563b9
+
c563b9
+static void
c563b9
+clean_up_fencing_connection(void)
c563b9
+{
c563b9
+    if (st == NULL) {
c563b9
+        return;
c563b9
     }
c563b9
+
c563b9
+    if (st->state != stonith_disconnected) {
c563b9
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT);
c563b9
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE);
c563b9
+        st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY);
c563b9
+        st->cmds->disconnect(st);
c563b9
+    }
c563b9
+
c563b9
+    stonith_api_delete(st);
c563b9
+    st = NULL;
c563b9
 }
c563b9
 
c563b9
 /*
c563b9
@@ -2074,7 +2078,8 @@ clean_up(crm_exit_t exit_code)
c563b9
     /* Quitting crm_mon is much more complicated than it ought to be. */
c563b9
 
c563b9
     /* (1) Close connections, free things, etc. */
c563b9
-    clean_up_connections();
c563b9
+    clean_up_cib_connection();
c563b9
+    clean_up_fencing_connection();
c563b9
     free(options.pid_file);
c563b9
     free(options.neg_location_prefix);
c563b9
     g_slist_free_full(options.includes_excludes, free);
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 28d646ce67c6a933eaa76aca51f9973a65d0ee3c Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Thu, 7 Jan 2021 17:18:13 -0500
c563b9
Subject: [PATCH 02/11] Refactor: tools: Split up connection establishment in
c563b9
 crm_mon.
c563b9
c563b9
We don't necessarily always want to connect to the CIB and fencing in
c563b9
the same action.  Note that bringing up the fencing connection needs to
c563b9
happen first, because mon_refresh_display is called from cib_connect and
c563b9
it will want a fencing connection.
c563b9
---
c563b9
 tools/crm_mon.c | 66 +++++++++++++++++++++++++++++++++------------------------
c563b9
 1 file changed, 38 insertions(+), 28 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index fc20e4c..301a222 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -128,6 +128,7 @@ static crm_exit_t clean_up(crm_exit_t exit_code);
c563b9
 static void crm_diff_update(const char *event, xmlNode * msg);
c563b9
 static int mon_refresh_display(gpointer user_data);
c563b9
 static int cib_connect(gboolean full);
c563b9
+static int fencing_connect(void);
c563b9
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
c563b9
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
c563b9
 static void kick_refresh(gboolean data_updated);
c563b9
@@ -668,8 +669,6 @@ static GOptionEntry deprecated_entries[] = {
c563b9
 static gboolean
c563b9
 mon_timer_popped(gpointer data)
c563b9
 {
c563b9
-    int rc = pcmk_ok;
c563b9
-
c563b9
 #if CURSES_ENABLED
c563b9
     if (output_format == mon_output_console) {
c563b9
         clear();
c563b9
@@ -683,9 +682,7 @@ mon_timer_popped(gpointer data)
c563b9
     }
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
-    rc = cib_connect(TRUE);
c563b9
-
c563b9
-    if (rc != pcmk_ok) {
c563b9
+    if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
c563b9
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
     }
c563b9
     return FALSE;
c563b9
@@ -767,39 +764,48 @@ mon_winresize(int nsig)
c563b9
 #endif
c563b9
 
c563b9
 static int
c563b9
-cib_connect(gboolean full)
c563b9
+fencing_connect(void)
c563b9
 {
c563b9
     int rc = pcmk_ok;
c563b9
-    static gboolean need_pass = TRUE;
c563b9
-
c563b9
-    CRM_CHECK(cib != NULL, return -EINVAL);
c563b9
-
c563b9
-    if (getenv("CIB_passwd") != NULL) {
c563b9
-        need_pass = FALSE;
c563b9
-    }
c563b9
 
c563b9
     if (pcmk_is_set(options.mon_ops, mon_op_fence_connect) && (st == NULL)) {
c563b9
         st = stonith_api_new();
c563b9
     }
c563b9
 
c563b9
-    if (pcmk_is_set(options.mon_ops, mon_op_fence_connect)
c563b9
-        && (st != NULL) && (st->state == stonith_disconnected)) {
c563b9
+    if (!pcmk_is_set(options.mon_ops, mon_op_fence_connect) ||
c563b9
+        st == NULL || st->state != stonith_disconnected) {
c563b9
+        return rc;
c563b9
+    }
c563b9
 
c563b9
-        rc = st->cmds->connect(st, crm_system_name, NULL);
c563b9
-        if (rc == pcmk_ok) {
c563b9
-            crm_trace("Setting up stonith callbacks");
c563b9
-            if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) {
c563b9
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
c563b9
-                                                mon_st_callback_event);
c563b9
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
c563b9
-            } else {
c563b9
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
c563b9
-                                                mon_st_callback_display);
c563b9
-                st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
c563b9
-            }
c563b9
+    rc = st->cmds->connect(st, crm_system_name, NULL);
c563b9
+    if (rc == pcmk_ok) {
c563b9
+        crm_trace("Setting up stonith callbacks");
c563b9
+        if (pcmk_is_set(options.mon_ops, mon_op_watch_fencing)) {
c563b9
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
c563b9
+                                            mon_st_callback_event);
c563b9
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event);
c563b9
+        } else {
c563b9
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT,
c563b9
+                                            mon_st_callback_display);
c563b9
+            st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
c563b9
         }
c563b9
     }
c563b9
 
c563b9
+    return rc;
c563b9
+}
c563b9
+
c563b9
+static int
c563b9
+cib_connect(gboolean full)
c563b9
+{
c563b9
+    int rc = pcmk_ok;
c563b9
+    static gboolean need_pass = TRUE;
c563b9
+
c563b9
+    CRM_CHECK(cib != NULL, return -EINVAL);
c563b9
+
c563b9
+    if (getenv("CIB_passwd") != NULL) {
c563b9
+        need_pass = FALSE;
c563b9
+    }
c563b9
+
c563b9
     if (cib->state == cib_connected_query || cib->state == cib_connected_command) {
c563b9
         return rc;
c563b9
     }
c563b9
@@ -1373,7 +1379,11 @@ main(int argc, char **argv)
c563b9
         if (!pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
             print_as(output_format ,"Waiting until cluster is available on this node ...\n");
c563b9
         }
c563b9
-        rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
c563b9
+
c563b9
+        rc = fencing_connect();
c563b9
+        if (rc == pcmk_ok) {
c563b9
+            rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
c563b9
+        }
c563b9
 
c563b9
         if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
             break;
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From e12508ffba06b1c5652e7f49a449aae6d89ec420 Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Tue, 12 Jan 2021 17:01:53 -0500
c563b9
Subject: [PATCH 03/11] Refactor: tools: Split one shot mode out into its own
c563b9
 function.
c563b9
c563b9
Also, the connection error handling function can get split out on its
c563b9
own as well to allow it to be reused in both the one shot and loop
c563b9
cases.
c563b9
---
c563b9
 tools/crm_mon.c | 69 +++++++++++++++++++++++++++++++++++----------------------
c563b9
 1 file changed, 43 insertions(+), 26 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 301a222..b33598b 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -1162,6 +1162,41 @@ reconcile_output_format(pcmk__common_args_t *args) {
c563b9
     }
c563b9
 }
c563b9
 
c563b9
+static void
c563b9
+handle_connection_failures(int rc)
c563b9
+{
c563b9
+    if (rc == pcmk_ok) {
c563b9
+        return;
c563b9
+    }
c563b9
+
c563b9
+    if (output_format == mon_output_monitor) {
c563b9
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
c563b9
+                    pcmk_strerror(rc));
c563b9
+        rc = MON_STATUS_CRIT;
c563b9
+    } else if (rc == -ENOTCONN) {
c563b9
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
c563b9
+        rc = crm_errno2exit(rc);
c563b9
+    } else {
c563b9
+        g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
c563b9
+        rc = crm_errno2exit(rc);
c563b9
+    }
c563b9
+
c563b9
+    clean_up(rc);
c563b9
+}
c563b9
+
c563b9
+static void
c563b9
+one_shot()
c563b9
+{
c563b9
+    int rc = fencing_connect();
c563b9
+
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
+        rc = cib_connect(FALSE);
c563b9
+        handle_connection_failures(rc);
c563b9
+    }
c563b9
+
c563b9
+    clean_up(CRM_EX_OK);
c563b9
+}
c563b9
+
c563b9
 int
c563b9
 main(int argc, char **argv)
c563b9
 {
c563b9
@@ -1375,20 +1410,19 @@ main(int argc, char **argv)
c563b9
 
c563b9
     crm_info("Starting %s", crm_system_name);
c563b9
 
c563b9
+    if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
+        one_shot();
c563b9
+    }
c563b9
+
c563b9
     do {
c563b9
-        if (!pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
-            print_as(output_format ,"Waiting until cluster is available on this node ...\n");
c563b9
-        }
c563b9
+        print_as(output_format ,"Waiting until cluster is available on this node ...\n");
c563b9
 
c563b9
         rc = fencing_connect();
c563b9
         if (rc == pcmk_ok) {
c563b9
-            rc = cib_connect(!pcmk_is_set(options.mon_ops, mon_op_one_shot));
c563b9
+            rc = cib_connect(TRUE);
c563b9
         }
c563b9
 
c563b9
-        if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
-            break;
c563b9
-
c563b9
-        } else if (rc != pcmk_ok) {
c563b9
+        if (rc != pcmk_ok) {
c563b9
             sleep(options.reconnect_msec / 1000);
c563b9
 #if CURSES_ENABLED
c563b9
             if (output_format == mon_output_console) {
c563b9
@@ -1402,24 +1436,7 @@ main(int argc, char **argv)
c563b9
 
c563b9
     } while (rc == -ENOTCONN);
c563b9
 
c563b9
-    if (rc != pcmk_ok) {
c563b9
-        if (output_format == mon_output_monitor) {
c563b9
-            g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
c563b9
-                        pcmk_strerror(rc));
c563b9
-            return clean_up(MON_STATUS_CRIT);
c563b9
-        } else {
c563b9
-            if (rc == -ENOTCONN) {
c563b9
-                g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
c563b9
-            } else {
c563b9
-                g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
c563b9
-            }
c563b9
-        }
c563b9
-        return clean_up(crm_errno2exit(rc));
c563b9
-    }
c563b9
-
c563b9
-    if (pcmk_is_set(options.mon_ops, mon_op_one_shot)) {
c563b9
-        return clean_up(CRM_EX_OK);
c563b9
-    }
c563b9
+    handle_connection_failures(rc);
c563b9
 
c563b9
     mainloop = g_main_loop_new(NULL, FALSE);
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 0eb307a19d57d4a59a4b51a64a3b62dcd0b7cc9a Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Wed, 13 Jan 2021 12:47:41 -0500
c563b9
Subject: [PATCH 04/11] Refactor: tools: Don't call mon_refresh_display from
c563b9
 cib_connect.
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 6 +++---
c563b9
 1 file changed, 3 insertions(+), 3 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index b33598b..b0daf76 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -683,6 +683,7 @@ mon_timer_popped(gpointer data)
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
     if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
c563b9
+        mon_refresh_display(NULL);
c563b9
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
     }
c563b9
     return FALSE;
c563b9
@@ -831,9 +832,6 @@ cib_connect(gboolean full)
c563b9
     }
c563b9
 
c563b9
     rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
c563b9
-    if (rc == pcmk_ok) {
c563b9
-        mon_refresh_display(NULL);
c563b9
-    }
c563b9
 
c563b9
     if (rc == pcmk_ok && full) {
c563b9
         rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular);
c563b9
@@ -1192,6 +1190,7 @@ one_shot()
c563b9
     if (rc == pcmk_rc_ok) {
c563b9
         rc = cib_connect(FALSE);
c563b9
         handle_connection_failures(rc);
c563b9
+        mon_refresh_display(NULL);
c563b9
     }
c563b9
 
c563b9
     clean_up(CRM_EX_OK);
c563b9
@@ -1437,6 +1436,7 @@ main(int argc, char **argv)
c563b9
     } while (rc == -ENOTCONN);
c563b9
 
c563b9
     handle_connection_failures(rc);
c563b9
+    mon_refresh_display(NULL);
c563b9
 
c563b9
     mainloop = g_main_loop_new(NULL, FALSE);
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 46696d3135e699c58918e41c93c357d951146d5c Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Wed, 13 Jan 2021 13:52:49 -0500
c563b9
Subject: [PATCH 05/11] Fix: tools: Report if getting fencing history failed in
c563b9
 crm_mon.
c563b9
c563b9
This just takes history_rc into account in the text and html formatters.
c563b9
It was already used by the XML formatter.  If we can't get fencing
c563b9
history, add a message to the output indicating that happened.
c563b9
---
c563b9
 tools/crm_mon.c       | 13 +++++----
c563b9
 tools/crm_mon.h       | 12 ++++-----
c563b9
 tools/crm_mon_print.c | 74 ++++++++++++++++++++++++++++++++++++++-------------
c563b9
 3 files changed, 70 insertions(+), 29 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index b0daf76..1a68555 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -1943,7 +1943,8 @@ mon_refresh_display(gpointer user_data)
c563b9
     switch (output_format) {
c563b9
         case mon_output_html:
c563b9
         case mon_output_cgi:
c563b9
-            if (print_html_status(out, mon_data_set, stonith_history, options.mon_ops,
c563b9
+            if (print_html_status(out, mon_data_set, crm_errno2exit(history_rc),
c563b9
+                                  stonith_history, options.mon_ops,
c563b9
                                   show, options.neg_location_prefix,
c563b9
                                   options.only_node, options.only_rsc) != 0) {
c563b9
                 g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file");
c563b9
@@ -1974,15 +1975,17 @@ mon_refresh_display(gpointer user_data)
c563b9
              */
c563b9
 #if CURSES_ENABLED
c563b9
             blank_screen();
c563b9
-            print_status(out, mon_data_set, stonith_history, options.mon_ops, show,
c563b9
-                         options.neg_location_prefix, options.only_node, options.only_rsc);
c563b9
+            print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history,
c563b9
+                         options.mon_ops, show, options.neg_location_prefix,
c563b9
+                         options.only_node, options.only_rsc);
c563b9
             refresh();
c563b9
             break;
c563b9
 #endif
c563b9
 
c563b9
         case mon_output_plain:
c563b9
-            print_status(out, mon_data_set, stonith_history, options.mon_ops, show,
c563b9
-                         options.neg_location_prefix, options.only_node, options.only_rsc);
c563b9
+            print_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history,
c563b9
+                         options.mon_ops, show, options.neg_location_prefix,
c563b9
+                         options.only_node, options.only_rsc);
c563b9
             break;
c563b9
 
c563b9
         case mon_output_unset:
c563b9
diff --git a/tools/crm_mon.h b/tools/crm_mon.h
c563b9
index f746507..73c926d 100644
c563b9
--- a/tools/crm_mon.h
c563b9
+++ b/tools/crm_mon.h
c563b9
@@ -95,17 +95,17 @@ typedef enum mon_output_format_e {
c563b9
 #define mon_op_default              (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect)
c563b9
 
c563b9
 void print_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
-                  stonith_history_t *stonith_history, unsigned int mon_ops,
c563b9
-                  unsigned int show, char *prefix, char *only_node,
c563b9
-                  char *only_rsc);
c563b9
+                  crm_exit_t history_rc, stonith_history_t *stonith_history,
c563b9
+                  unsigned int mon_ops, unsigned int show, char *prefix,
c563b9
+                  char *only_node, char *only_rsc);
c563b9
 void print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
                       crm_exit_t history_rc, stonith_history_t *stonith_history,
c563b9
                       unsigned int mon_ops, unsigned int show, char *prefix,
c563b9
                       char *only_node, char *only_rsc);
c563b9
 int print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
-                      stonith_history_t *stonith_history, unsigned int mon_ops,
c563b9
-                      unsigned int show, char *prefix, char *only_node,
c563b9
-                      char *only_rsc);
c563b9
+                      crm_exit_t history_rc, stonith_history_t *stonith_history,
c563b9
+                      unsigned int mon_ops, unsigned int show, char *prefix,
c563b9
+                      char *only_node, char *only_rsc);
c563b9
 
c563b9
 GList *append_attr_list(GList *attr_list, char *name);
c563b9
 void blank_screen(void);
c563b9
diff --git a/tools/crm_mon_print.c b/tools/crm_mon_print.c
c563b9
index 8ae11bf..73406bd 100644
c563b9
--- a/tools/crm_mon_print.c
c563b9
+++ b/tools/crm_mon_print.c
c563b9
@@ -656,6 +656,7 @@ print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
  *
c563b9
  * \param[in] out             The output functions structure.
c563b9
  * \param[in] data_set        Cluster state to display.
c563b9
+ * \param[in] history_rc      Result of getting stonith history
c563b9
  * \param[in] stonith_history List of stonith actions.
c563b9
  * \param[in] mon_ops         Bitmask of mon_op_*.
c563b9
  * \param[in] show            Bitmask of mon_show_*.
c563b9
@@ -663,14 +664,16 @@ print_failed_actions(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
  */
c563b9
 void
c563b9
 print_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
-             stonith_history_t *stonith_history, unsigned int mon_ops,
c563b9
-             unsigned int show, char *prefix, char *only_node, char *only_rsc)
c563b9
+             crm_exit_t history_rc, stonith_history_t *stonith_history,
c563b9
+             unsigned int mon_ops, unsigned int show, char *prefix,
c563b9
+             char *only_node, char *only_rsc)
c563b9
 {
c563b9
     GListPtr unames = NULL;
c563b9
     GListPtr resources = NULL;
c563b9
 
c563b9
     unsigned int print_opts = get_resource_display_options(mon_ops);
c563b9
     int rc = pcmk_rc_no_output;
c563b9
+    bool already_printed_failure = false;
c563b9
 
c563b9
     CHECK_RC(rc, out->message(out, "cluster-summary", data_set,
c563b9
                               pcmk_is_set(mon_ops, mon_op_print_clone_detail),
c563b9
@@ -731,13 +734,23 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
     if (pcmk_is_set(show, mon_show_fence_failed)
c563b9
         && pcmk_is_set(mon_ops, mon_op_fence_history)) {
c563b9
 
c563b9
-        stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
c563b9
-                                                              GINT_TO_POINTER(st_failed));
c563b9
+        if (history_rc == 0) {
c563b9
+            stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
c563b9
+                                                                  GINT_TO_POINTER(st_failed));
c563b9
+
c563b9
+            if (hp) {
c563b9
+                CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames,
c563b9
+                                          pcmk_is_set(mon_ops, mon_op_fence_full_history),
c563b9
+                                          rc == pcmk_rc_ok));
c563b9
+            }
c563b9
+        } else {
c563b9
+            PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
c563b9
+            out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
c563b9
+            out->list_item(out, NULL, "Failed to get fencing history: %s",
c563b9
+                           crm_exit_str(history_rc));
c563b9
+            out->end_list(out);
c563b9
 
c563b9
-        if (hp) {
c563b9
-            CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames,
c563b9
-                                      pcmk_is_set(mon_ops, mon_op_fence_full_history),
c563b9
-                                      rc == pcmk_rc_ok));
c563b9
+            already_printed_failure = true;
c563b9
         }
c563b9
     }
c563b9
 
c563b9
@@ -754,7 +767,15 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
 
c563b9
     /* Print stonith history */
c563b9
     if (pcmk_is_set(mon_ops, mon_op_fence_history)) {
c563b9
-        if (pcmk_is_set(show, mon_show_fence_worked)) {
c563b9
+        if (history_rc != 0) {
c563b9
+            if (!already_printed_failure) {
c563b9
+                PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
c563b9
+                out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
c563b9
+                out->list_item(out, NULL, "Failed to get fencing history: %s",
c563b9
+                               crm_exit_str(history_rc));
c563b9
+                out->end_list(out);
c563b9
+            }
c563b9
+        } else if (pcmk_is_set(show, mon_show_fence_worked)) {
c563b9
             stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
c563b9
                                                                   GINT_TO_POINTER(st_failed));
c563b9
 
c563b9
@@ -783,6 +804,7 @@ print_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
  *
c563b9
  * \param[in] out             The output functions structure.
c563b9
  * \param[in] data_set        Cluster state to display.
c563b9
+ * \param[in] history_rc      Result of getting stonith history
c563b9
  * \param[in] stonith_history List of stonith actions.
c563b9
  * \param[in] mon_ops         Bitmask of mon_op_*.
c563b9
  * \param[in] show            Bitmask of mon_show_*.
c563b9
@@ -878,6 +900,7 @@ print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
  *
c563b9
  * \param[in] out             The output functions structure.
c563b9
  * \param[in] data_set        Cluster state to display.
c563b9
+ * \param[in] history_rc      Result of getting stonith history
c563b9
  * \param[in] stonith_history List of stonith actions.
c563b9
  * \param[in] mon_ops         Bitmask of mon_op_*.
c563b9
  * \param[in] show            Bitmask of mon_show_*.
c563b9
@@ -885,14 +908,15 @@ print_xml_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
  */
c563b9
 int
c563b9
 print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
-                  stonith_history_t *stonith_history, unsigned int mon_ops,
c563b9
-                  unsigned int show, char *prefix, char *only_node,
c563b9
-                  char *only_rsc)
c563b9
+                  crm_exit_t history_rc, stonith_history_t *stonith_history,
c563b9
+                  unsigned int mon_ops, unsigned int show, char *prefix,
c563b9
+                  char *only_node, char *only_rsc)
c563b9
 {
c563b9
     GListPtr unames = NULL;
c563b9
     GListPtr resources = NULL;
c563b9
 
c563b9
     unsigned int print_opts = get_resource_display_options(mon_ops);
c563b9
+    bool already_printed_failure = false;
c563b9
 
c563b9
     out->message(out, "cluster-summary", data_set,
c563b9
                  pcmk_is_set(mon_ops, mon_op_print_clone_detail),
c563b9
@@ -950,18 +974,32 @@ print_html_status(pcmk__output_t *out, pe_working_set_t *data_set,
c563b9
     if (pcmk_is_set(show, mon_show_fence_failed)
c563b9
         && pcmk_is_set(mon_ops, mon_op_fence_history)) {
c563b9
 
c563b9
-        stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
c563b9
-                                                              GINT_TO_POINTER(st_failed));
c563b9
+        if (history_rc == 0) {
c563b9
+            stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq,
c563b9
+                                                                  GINT_TO_POINTER(st_failed));
c563b9
 
c563b9
-        if (hp) {
c563b9
-            out->message(out, "failed-fencing-list", stonith_history, unames,
c563b9
-                         pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE);
c563b9
+            if (hp) {
c563b9
+                out->message(out, "failed-fencing-list", stonith_history, unames,
c563b9
+                             pcmk_is_set(mon_ops, mon_op_fence_full_history), FALSE);
c563b9
+            }
c563b9
+        } else {
c563b9
+            out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
c563b9
+            out->list_item(out, NULL, "Failed to get fencing history: %s",
c563b9
+                           crm_exit_str(history_rc));
c563b9
+            out->end_list(out);
c563b9
         }
c563b9
     }
c563b9
 
c563b9
     /* Print stonith history */
c563b9
     if (pcmk_is_set(mon_ops, mon_op_fence_history)) {
c563b9
-        if (pcmk_is_set(show, mon_show_fence_worked)) {
c563b9
+        if (history_rc != 0) {
c563b9
+            if (!already_printed_failure) {
c563b9
+                out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
c563b9
+                out->list_item(out, NULL, "Failed to get fencing history: %s",
c563b9
+                               crm_exit_str(history_rc));
c563b9
+                out->end_list(out);
c563b9
+            }
c563b9
+        } else if (pcmk_is_set(show, mon_show_fence_worked)) {
c563b9
             stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq,
c563b9
                                                                   GINT_TO_POINTER(st_failed));
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 2e391be6fdbbbccd6aef49b3f109e5c342eb5dcc Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Mon, 11 Jan 2021 12:54:40 -0500
c563b9
Subject: [PATCH 06/11] Fix: tools: A lack of stonith history is not fatal in
c563b9
 crm_mon.
c563b9
c563b9
Instead, print out all the rest of the typical output.  This should also
c563b9
include an error message in the fencing section, if that section was
c563b9
requested.
c563b9
c563b9
See: rhbz#1880426
c563b9
---
c563b9
 tools/crm_mon.c | 40 ++++++++++++++++------------------------
c563b9
 1 file changed, 16 insertions(+), 24 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 1a68555..17b8ee9 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -682,7 +682,8 @@ mon_timer_popped(gpointer data)
c563b9
     }
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
-    if (fencing_connect() == pcmk_ok && cib_connect(TRUE) == pcmk_ok) {
c563b9
+    fencing_connect();
c563b9
+    if (cib_connect(TRUE) == pcmk_ok) {
c563b9
         mon_refresh_display(NULL);
c563b9
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
     }
c563b9
@@ -726,12 +727,6 @@ mon_cib_connection_destroy_regular(gpointer user_data)
c563b9
     do_mon_cib_connection_destroy(user_data, false);
c563b9
 }
c563b9
 
c563b9
-static void
c563b9
-mon_cib_connection_destroy_error(gpointer user_data)
c563b9
-{
c563b9
-    do_mon_cib_connection_destroy(user_data, true);
c563b9
-}
c563b9
-
c563b9
 /*
c563b9
  * Mainloop signal handler.
c563b9
  */
c563b9
@@ -790,6 +785,8 @@ fencing_connect(void)
c563b9
                                             mon_st_callback_display);
c563b9
             st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
c563b9
         }
c563b9
+    } else {
c563b9
+        st = NULL;
c563b9
     }
c563b9
 
c563b9
     return rc;
c563b9
@@ -1185,12 +1182,15 @@ handle_connection_failures(int rc)
c563b9
 static void
c563b9
 one_shot()
c563b9
 {
c563b9
-    int rc = fencing_connect();
c563b9
+    int rc;
c563b9
+
c563b9
+    fencing_connect();
c563b9
 
c563b9
+    rc = cib_connect(FALSE);
c563b9
     if (rc == pcmk_rc_ok) {
c563b9
-        rc = cib_connect(FALSE);
c563b9
-        handle_connection_failures(rc);
c563b9
         mon_refresh_display(NULL);
c563b9
+    } else {
c563b9
+        handle_connection_failures(rc);
c563b9
     }
c563b9
 
c563b9
     clean_up(CRM_EX_OK);
c563b9
@@ -1416,10 +1416,8 @@ main(int argc, char **argv)
c563b9
     do {
c563b9
         print_as(output_format ,"Waiting until cluster is available on this node ...\n");
c563b9
 
c563b9
-        rc = fencing_connect();
c563b9
-        if (rc == pcmk_ok) {
c563b9
-            rc = cib_connect(TRUE);
c563b9
-        }
c563b9
+        fencing_connect();
c563b9
+        rc = cib_connect(TRUE);
c563b9
 
c563b9
         if (rc != pcmk_ok) {
c563b9
             sleep(options.reconnect_msec / 1000);
c563b9
@@ -1896,16 +1894,12 @@ mon_refresh_display(gpointer user_data)
c563b9
         return 0;
c563b9
     }
c563b9
 
c563b9
-    /* get the stonith-history if there is evidence we need it
c563b9
-     */
c563b9
+    /* get the stonith-history if there is evidence we need it */
c563b9
     while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
c563b9
         if (st != NULL) {
c563b9
             history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120);
c563b9
 
c563b9
-            if (history_rc != 0) {
c563b9
-                out->err(out, "Critical: Unable to get stonith-history");
c563b9
-                mon_cib_connection_destroy_error(NULL);
c563b9
-            } else {
c563b9
+            if (history_rc == 0) {
c563b9
                 stonith_history = stonith__sort_history(stonith_history);
c563b9
                 if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
c563b9
                     && (output_format != mon_output_xml)) {
c563b9
@@ -1915,11 +1909,9 @@ mon_refresh_display(gpointer user_data)
c563b9
                 break; /* all other cases are errors */
c563b9
             }
c563b9
         } else {
c563b9
-            out->err(out, "Critical: No stonith-API");
c563b9
+            history_rc = ENOTCONN;
c563b9
+            break;
c563b9
         }
c563b9
-        free_xml(cib_copy);
c563b9
-        out->err(out, "Reading stonith-history failed");
c563b9
-        return 0;
c563b9
     }
c563b9
 
c563b9
     if (mon_data_set == NULL) {
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 8abcb2bf0c5c90004a687e27aa86fd6ad1b62eb3 Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Thu, 14 Jan 2021 14:31:25 -0500
c563b9
Subject: [PATCH 07/11] Refactor: Split the fencing history code into its own
c563b9
 function.
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 46 ++++++++++++++++++++++++++++------------------
c563b9
 1 file changed, 28 insertions(+), 18 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 17b8ee9..1baba5f 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -1879,6 +1879,33 @@ crm_diff_update(const char *event, xmlNode * msg)
c563b9
 }
c563b9
 
c563b9
 static int
c563b9
+get_fencing_history(stonith_history_t **stonith_history)
c563b9
+{
c563b9
+    int rc = 0;
c563b9
+
c563b9
+    while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
c563b9
+        if (st != NULL) {
c563b9
+            rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120);
c563b9
+
c563b9
+            if (rc == 0) {
c563b9
+                *stonith_history = stonith__sort_history(*stonith_history);
c563b9
+                if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
c563b9
+                    && (output_format != mon_output_xml)) {
c563b9
+
c563b9
+                    *stonith_history = pcmk__reduce_fence_history(*stonith_history);
c563b9
+                }
c563b9
+                break; /* all other cases are errors */
c563b9
+            }
c563b9
+        } else {
c563b9
+            rc = ENOTCONN;
c563b9
+            break;
c563b9
+        }
c563b9
+    }
c563b9
+
c563b9
+    return rc;
c563b9
+}
c563b9
+
c563b9
+static int
c563b9
 mon_refresh_display(gpointer user_data)
c563b9
 {
c563b9
     xmlNode *cib_copy = copy_xml(current_cib);
c563b9
@@ -1895,24 +1922,7 @@ mon_refresh_display(gpointer user_data)
c563b9
     }
c563b9
 
c563b9
     /* get the stonith-history if there is evidence we need it */
c563b9
-    while (pcmk_is_set(options.mon_ops, mon_op_fence_history)) {
c563b9
-        if (st != NULL) {
c563b9
-            history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120);
c563b9
-
c563b9
-            if (history_rc == 0) {
c563b9
-                stonith_history = stonith__sort_history(stonith_history);
c563b9
-                if (!pcmk_is_set(options.mon_ops, mon_op_fence_full_history)
c563b9
-                    && (output_format != mon_output_xml)) {
c563b9
-
c563b9
-                    stonith_history = pcmk__reduce_fence_history(stonith_history);
c563b9
-                }
c563b9
-                break; /* all other cases are errors */
c563b9
-            }
c563b9
-        } else {
c563b9
-            history_rc = ENOTCONN;
c563b9
-            break;
c563b9
-        }
c563b9
-    }
c563b9
+    history_rc = get_fencing_history(&stonith_history);
c563b9
 
c563b9
     if (mon_data_set == NULL) {
c563b9
         mon_data_set = pe_new_working_set();
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From fa75e884e3c3822e1010ad1d67958e4f1cc5400b Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Thu, 14 Jan 2021 14:49:09 -0500
c563b9
Subject: [PATCH 08/11] Refactor: tools: Get rid of
c563b9
 mon_cib_connection_destroy_regular.
c563b9
c563b9
With the _error version removed in a previous commit, there's no need
c563b9
for this wrapper to exist anymore.  We can just call
c563b9
mon_cib_connection_destroy directly.
c563b9
---
c563b9
 tools/crm_mon.c | 22 ++++++----------------
c563b9
 1 file changed, 6 insertions(+), 16 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 1baba5f..a0764a5 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -691,13 +691,9 @@ mon_timer_popped(gpointer data)
c563b9
 }
c563b9
 
c563b9
 static void
c563b9
-do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
c563b9
+mon_cib_connection_destroy(gpointer user_data)
c563b9
 {
c563b9
-    if (is_error) {
c563b9
-        out->err(out, "Connection to the cluster-daemons terminated");
c563b9
-    } else {
c563b9
-        out->info(out, "Connection to the cluster-daemons terminated");
c563b9
-    }
c563b9
+    out->info(out, "Connection to the cluster-daemons terminated");
c563b9
 
c563b9
     if (refresh_timer != NULL) {
c563b9
         /* we'll trigger a refresh after reconnect */
c563b9
@@ -721,12 +717,6 @@ do_mon_cib_connection_destroy(gpointer user_data, bool is_error)
c563b9
     return;
c563b9
 }
c563b9
 
c563b9
-static void
c563b9
-mon_cib_connection_destroy_regular(gpointer user_data)
c563b9
-{
c563b9
-    do_mon_cib_connection_destroy(user_data, false);
c563b9
-}
c563b9
-
c563b9
 /*
c563b9
  * Mainloop signal handler.
c563b9
  */
c563b9
@@ -831,7 +821,7 @@ cib_connect(gboolean full)
c563b9
     rc = cib->cmds->query(cib, NULL, &current_cib, cib_scope_local | cib_sync_call);
c563b9
 
c563b9
     if (rc == pcmk_ok && full) {
c563b9
-        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular);
c563b9
+        rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
c563b9
         if (rc == -EPROTONOSUPPORT) {
c563b9
             print_as
c563b9
                 (output_format, "Notification setup not supported, won't be able to reconnect after failure");
c563b9
@@ -890,7 +880,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
c563b9
                     options.mon_ops |= mon_op_fence_history;
c563b9
                     options.mon_ops |= mon_op_fence_connect;
c563b9
                     if (st == NULL) {
c563b9
-                        mon_cib_connection_destroy_regular(NULL);
c563b9
+                        mon_cib_connection_destroy(NULL);
c563b9
                     }
c563b9
                 }
c563b9
 
c563b9
@@ -2010,7 +2000,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
c563b9
 {
c563b9
     if (st->state == stonith_disconnected) {
c563b9
         /* disconnect cib as well and have everything reconnect */
c563b9
-        mon_cib_connection_destroy_regular(NULL);
c563b9
+        mon_cib_connection_destroy(NULL);
c563b9
     } else if (options.external_agent) {
c563b9
         char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
c563b9
                                     e->operation, e->origin, e->target, pcmk_strerror(e->result),
c563b9
@@ -2059,7 +2049,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
c563b9
 {
c563b9
     if (st->state == stonith_disconnected) {
c563b9
         /* disconnect cib as well and have everything reconnect */
c563b9
-        mon_cib_connection_destroy_regular(NULL);
c563b9
+        mon_cib_connection_destroy(NULL);
c563b9
     } else {
c563b9
         print_dot(output_format);
c563b9
         kick_refresh(TRUE);
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 009f3aa0caa6d138d4da418297f12c4a1210cf6b Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Thu, 14 Jan 2021 16:25:37 -0500
c563b9
Subject: [PATCH 09/11] Refactor: Add comments to connection functions in
c563b9
 crm_mon.c.
c563b9
c563b9
There are an awful lot of these functions, and trying to make sense of
c563b9
them can be confusing when there's no comments explaining when they
c563b9
happen.  Hopefully this helps a little.
c563b9
---
c563b9
 tools/crm_mon.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++--------
c563b9
 1 file changed, 48 insertions(+), 8 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index a0764a5..54a7958 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -666,6 +666,10 @@ static GOptionEntry deprecated_entries[] = {
c563b9
 };
c563b9
 /* *INDENT-ON* */
c563b9
 
c563b9
+/* Reconnect to the CIB and fencing agent after reconnect_msec has passed.  This sounds
c563b9
+ * like it would be more broadly useful, but only ever happens after a disconnect via
c563b9
+ * mon_cib_connection_destroy.
c563b9
+ */
c563b9
 static gboolean
c563b9
 mon_timer_popped(gpointer data)
c563b9
 {
c563b9
@@ -684,12 +688,17 @@ mon_timer_popped(gpointer data)
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
     fencing_connect();
c563b9
     if (cib_connect(TRUE) == pcmk_ok) {
c563b9
+        /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
         mon_refresh_display(NULL);
c563b9
         timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
     }
c563b9
     return FALSE;
c563b9
 }
c563b9
 
c563b9
+/* Called from various places when we are disconnected from the CIB or from the
c563b9
+ * fencing agent.  If the CIB connection is still valid, this function will also
c563b9
+ * attempt to sign off and reconnect.
c563b9
+ */
c563b9
 static void
c563b9
 mon_cib_connection_destroy(gpointer user_data)
c563b9
 {
c563b9
@@ -717,9 +726,7 @@ mon_cib_connection_destroy(gpointer user_data)
c563b9
     return;
c563b9
 }
c563b9
 
c563b9
-/*
c563b9
- * Mainloop signal handler.
c563b9
- */
c563b9
+/* Signal handler installed into the mainloop for normal program shutdown */
c563b9
 static void
c563b9
 mon_shutdown(int nsig)
c563b9
 {
c563b9
@@ -729,6 +736,10 @@ mon_shutdown(int nsig)
c563b9
 #if CURSES_ENABLED
c563b9
 static sighandler_t ncurses_winch_handler;
c563b9
 
c563b9
+/* Signal handler installed the regular way (not into the main loop) for when
c563b9
+ * the screen is resized.  Commonly, this happens when running in an xterm and
c563b9
+ * the user changes its size.
c563b9
+ */
c563b9
 static void
c563b9
 mon_winresize(int nsig)
c563b9
 {
c563b9
@@ -743,6 +754,9 @@ mon_winresize(int nsig)
c563b9
             (*ncurses_winch_handler) (SIGWINCH);
c563b9
         getmaxyx(stdscr, lines, cols);
c563b9
         resizeterm(lines, cols);
c563b9
+        /* Alert the mainloop code we'd like the refresh_trigger to run next
c563b9
+         * time the mainloop gets around to checking.
c563b9
+         */
c563b9
         mainloop_set_trigger(refresh_trigger);
c563b9
     }
c563b9
     not_done--;
c563b9
@@ -863,6 +877,12 @@ get_option_desc(char c)
c563b9
 #define print_option_help(output_format, option, condition) \
c563b9
     out->info(out, "%c %c: \t%s", ((condition)? '*': ' '), option, get_option_desc(option));
c563b9
 
c563b9
+/* This function is called from the main loop when there is something to be read
c563b9
+ * on stdin, like an interactive user's keystroke.  All it does is read the keystroke,
c563b9
+ * set flags (or show the page showing which keystrokes are valid), and redraw the
c563b9
+ * screen.  It does not do anything with connections to the CIB or fencing agent
c563b9
+ * agent what would happen in mon_refresh_display.
c563b9
+ */
c563b9
 static gboolean
c563b9
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
c563b9
 {
c563b9
@@ -951,6 +971,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
c563b9
                 config_mode = TRUE;
c563b9
                 break;
c563b9
             default:
c563b9
+                /* All other keys just redraw the screen. */
c563b9
                 goto refresh;
c563b9
         }
c563b9
 
c563b9
@@ -1441,6 +1462,10 @@ main(int argc, char **argv)
c563b9
         g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL);
c563b9
     }
c563b9
 #endif
c563b9
+
c563b9
+    /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display.  In
c563b9
+     * this file, that is anywhere mainloop_set_trigger is called.
c563b9
+     */
c563b9
     refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL);
c563b9
 
c563b9
     g_main_loop_run(mainloop);
c563b9
@@ -1677,6 +1702,10 @@ handle_rsc_op(xmlNode * xml, const char *node_id)
c563b9
     free(task);
c563b9
 }
c563b9
 
c563b9
+/* This function is just a wrapper around mainloop_set_trigger so that it can be
c563b9
+ * called from a mainloop directly.  It's simply another way of ensuring the screen
c563b9
+ * gets redrawn.
c563b9
+ */
c563b9
 static gboolean
c563b9
 mon_trigger_refresh(gpointer user_data)
c563b9
 {
c563b9
@@ -1995,6 +2024,9 @@ mon_refresh_display(gpointer user_data)
c563b9
     return 1;
c563b9
 }
c563b9
 
c563b9
+/* This function is called for fencing events (see fencing_connect for which ones) when
c563b9
+ * --watch-fencing is used on the command line.
c563b9
+ */
c563b9
 static void
c563b9
 mon_st_callback_event(stonith_t * st, stonith_event_t * e)
c563b9
 {
c563b9
@@ -2010,6 +2042,16 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
c563b9
     }
c563b9
 }
c563b9
 
c563b9
+/* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met:
c563b9
+ *
c563b9
+ * - If the last update occurred more than reconnect_msec ago (defaults to 5s, but can be
c563b9
+ *   changed via the -i command line option), or
c563b9
+ * - After every 10 CIB updates, or
c563b9
+ * - If it's been 2s since the last update
c563b9
+ *
c563b9
+ * This function sounds like it would be more broadly useful, but it is only called when a
c563b9
+ * fencing event is received or a CIB diff occurrs.
c563b9
+ */
c563b9
 static void
c563b9
 kick_refresh(gboolean data_updated)
c563b9
 {
c563b9
@@ -2024,11 +2066,6 @@ kick_refresh(gboolean data_updated)
c563b9
         refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
c563b9
     }
c563b9
 
c563b9
-    /* Refresh
c563b9
-     * - immediately if the last update was more than 5s ago
c563b9
-     * - every 10 cib-updates
c563b9
-     * - at most 2s after the last update
c563b9
-     */
c563b9
     if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
c563b9
         mainloop_set_trigger(refresh_trigger);
c563b9
         mainloop_timer_stop(refresh_timer);
c563b9
@@ -2044,6 +2081,9 @@ kick_refresh(gboolean data_updated)
c563b9
     }
c563b9
 }
c563b9
 
c563b9
+/* This function is called for fencing events (see fencing_connect for which ones) when
c563b9
+ * --watch-fencing is NOT used on the command line.
c563b9
+ */
c563b9
 static void
c563b9
 mon_st_callback_display(stonith_t * st, stonith_event_t * e)
c563b9
 {
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From aa328f0788ef0057874aeeeae7261dfb450b9b9e Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Thu, 14 Jan 2021 16:44:45 -0500
c563b9
Subject: [PATCH 10/11] Refactor: tools: Rename some connection-related symbols
c563b9
 in crm_mon.
c563b9
c563b9
---
c563b9
 tools/crm_mon.c | 28 ++++++++++++++--------------
c563b9
 1 file changed, 14 insertions(+), 14 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 54a7958..89d7ae2 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -66,7 +66,7 @@ static mon_output_format_t output_format = mon_output_unset;
c563b9
 /* other globals */
c563b9
 static GIOChannel *io_channel = NULL;
c563b9
 static GMainLoop *mainloop = NULL;
c563b9
-static guint timer_id = 0;
c563b9
+static guint reconnect_timer = 0;
c563b9
 static mainloop_timer_t *refresh_timer = NULL;
c563b9
 static pe_working_set_t *mon_data_set = NULL;
c563b9
 
c563b9
@@ -131,7 +131,7 @@ static int cib_connect(gboolean full);
c563b9
 static int fencing_connect(void);
c563b9
 static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
c563b9
 static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
c563b9
-static void kick_refresh(gboolean data_updated);
c563b9
+static void refresh_after_event(gboolean data_updated);
c563b9
 
c563b9
 static unsigned int
c563b9
 all_includes(mon_output_format_t fmt) {
c563b9
@@ -671,7 +671,7 @@ static GOptionEntry deprecated_entries[] = {
c563b9
  * mon_cib_connection_destroy.
c563b9
  */
c563b9
 static gboolean
c563b9
-mon_timer_popped(gpointer data)
c563b9
+reconnect_after_timeout(gpointer data)
c563b9
 {
c563b9
 #if CURSES_ENABLED
c563b9
     if (output_format == mon_output_console) {
c563b9
@@ -680,9 +680,9 @@ mon_timer_popped(gpointer data)
c563b9
     }
c563b9
 #endif
c563b9
 
c563b9
-    if (timer_id > 0) {
c563b9
-        g_source_remove(timer_id);
c563b9
-        timer_id = 0;
c563b9
+    if (reconnect_timer > 0) {
c563b9
+        g_source_remove(reconnect_timer);
c563b9
+        reconnect_timer = 0;
c563b9
     }
c563b9
 
c563b9
     print_as(output_format, "Reconnecting...\n");
c563b9
@@ -690,7 +690,7 @@ mon_timer_popped(gpointer data)
c563b9
     if (cib_connect(TRUE) == pcmk_ok) {
c563b9
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
         mon_refresh_display(NULL);
c563b9
-        timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
+        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
     }
c563b9
     return FALSE;
c563b9
 }
c563b9
@@ -708,10 +708,10 @@ mon_cib_connection_destroy(gpointer user_data)
c563b9
         /* we'll trigger a refresh after reconnect */
c563b9
         mainloop_timer_stop(refresh_timer);
c563b9
     }
c563b9
-    if (timer_id) {
c563b9
+    if (reconnect_timer) {
c563b9
         /* we'll trigger a new reconnect-timeout at the end */
c563b9
-        g_source_remove(timer_id);
c563b9
-        timer_id = 0;
c563b9
+        g_source_remove(reconnect_timer);
c563b9
+        reconnect_timer = 0;
c563b9
     }
c563b9
     if (st) {
c563b9
         /* the client API won't properly reconnect notifications
c563b9
@@ -721,7 +721,7 @@ mon_cib_connection_destroy(gpointer user_data)
c563b9
     }
c563b9
     if (cib) {
c563b9
         cib->cmds->signoff(cib);
c563b9
-        timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL);
c563b9
+        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
     }
c563b9
     return;
c563b9
 }
c563b9
@@ -1894,7 +1894,7 @@ crm_diff_update(const char *event, xmlNode * msg)
c563b9
     }
c563b9
 
c563b9
     stale = FALSE;
c563b9
-    kick_refresh(cib_updated);
c563b9
+    refresh_after_event(cib_updated);
c563b9
 }
c563b9
 
c563b9
 static int
c563b9
@@ -2053,7 +2053,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
c563b9
  * fencing event is received or a CIB diff occurrs.
c563b9
  */
c563b9
 static void
c563b9
-kick_refresh(gboolean data_updated)
c563b9
+refresh_after_event(gboolean data_updated)
c563b9
 {
c563b9
     static int updates = 0;
c563b9
     time_t now = time(NULL);
c563b9
@@ -2092,7 +2092,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
c563b9
         mon_cib_connection_destroy(NULL);
c563b9
     } else {
c563b9
         print_dot(output_format);
c563b9
-        kick_refresh(TRUE);
c563b9
+        refresh_after_event(TRUE);
c563b9
     }
c563b9
 }
c563b9
 
c563b9
-- 
c563b9
1.8.3.1
c563b9
c563b9
c563b9
From 8c51b4980f349e8773681f7ed2882ca639e0e63a Mon Sep 17 00:00:00 2001
c563b9
From: Chris Lumens <clumens@redhat.com>
c563b9
Date: Mon, 18 Jan 2021 14:03:39 -0500
c563b9
Subject: [PATCH 11/11] Fix: tools: Attempt to reestablish connections in
c563b9
 crm_mon.
c563b9
c563b9
If the fencing or CIB connections go away between screen refreshes,
c563b9
attempt to re-establish those connections.  The functions that do this
c563b9
should be safe to be called repeatedly.
c563b9
c563b9
See: rhbz#1880426, rhbz#1466875
c563b9
---
c563b9
 tools/crm_mon.c | 17 ++++++++++++++---
c563b9
 1 file changed, 14 insertions(+), 3 deletions(-)
c563b9
c563b9
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
c563b9
index 89d7ae2..083b7ae 100644
c563b9
--- a/tools/crm_mon.c
c563b9
+++ b/tools/crm_mon.c
c563b9
@@ -126,6 +126,7 @@ static void clean_up_cib_connection(void);
c563b9
 static void clean_up_fencing_connection(void);
c563b9
 static crm_exit_t clean_up(crm_exit_t exit_code);
c563b9
 static void crm_diff_update(const char *event, xmlNode * msg);
c563b9
+static void handle_connection_failures(int rc);
c563b9
 static int mon_refresh_display(gpointer user_data);
c563b9
 static int cib_connect(gboolean full);
c563b9
 static int fencing_connect(void);
c563b9
@@ -690,9 +691,11 @@ reconnect_after_timeout(gpointer data)
c563b9
     if (cib_connect(TRUE) == pcmk_ok) {
c563b9
         /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
c563b9
         mon_refresh_display(NULL);
c563b9
-        reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
+        return FALSE;
c563b9
     }
c563b9
-    return FALSE;
c563b9
+
c563b9
+    reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
c563b9
+    return TRUE;
c563b9
 }
c563b9
 
c563b9
 /* Called from various places when we are disconnected from the CIB or from the
c563b9
@@ -887,6 +890,7 @@ static gboolean
c563b9
 detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
c563b9
 {
c563b9
     int c;
c563b9
+    int rc;
c563b9
     gboolean config_mode = FALSE;
c563b9
 
c563b9
     while (1) {
c563b9
@@ -1001,7 +1005,14 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
c563b9
     }
c563b9
 
c563b9
 refresh:
c563b9
-    mon_refresh_display(NULL);
c563b9
+    fencing_connect();
c563b9
+    rc = cib_connect(FALSE);
c563b9
+    if (rc == pcmk_rc_ok) {
c563b9
+        mon_refresh_display(NULL);
c563b9
+    } else {
c563b9
+        handle_connection_failures(rc);
c563b9
+    }
c563b9
+
c563b9
     return TRUE;
c563b9
 }
c563b9
 #endif
c563b9
-- 
c563b9
1.8.3.1
c563b9