|
|
587943 |
From 68139dc8ff5efbfd81d3b5e868462e7eaefa2c74 Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Mon, 25 Jan 2021 15:35:33 +0100
|
|
|
587943 |
Subject: [PATCH 1/7] Fix: crm_mon: add explicit void to one_shot prototype for
|
|
|
587943 |
compat
|
|
|
587943 |
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 2 +-
|
|
|
587943 |
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 0981634..1eca1b7 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -1226,7 +1226,7 @@ handle_connection_failures(int rc)
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
static void
|
|
|
587943 |
-one_shot()
|
|
|
587943 |
+one_shot(void)
|
|
|
587943 |
{
|
|
|
587943 |
int rc;
|
|
|
587943 |
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From 8c7a01f8880efff8457e8421c381082b250d4512 Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Mon, 25 Jan 2021 16:26:30 +0100
|
|
|
587943 |
Subject: [PATCH 2/7] Refactor: crm_mon: cib_connect &
|
|
|
587943 |
handle_connection_failures -> new rc
|
|
|
587943 |
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 62 ++++++++++++++++++++++++++++++++-------------------------
|
|
|
587943 |
1 file changed, 35 insertions(+), 27 deletions(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 1eca1b7..3fbac5f 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -690,7 +690,7 @@ reconnect_after_timeout(gpointer data)
|
|
|
587943 |
|
|
|
587943 |
print_as(output_format, "Reconnecting...\n");
|
|
|
587943 |
fencing_connect();
|
|
|
587943 |
- if (cib_connect(TRUE) == pcmk_ok) {
|
|
|
587943 |
+ if (cib_connect(TRUE) == pcmk_rc_ok) {
|
|
|
587943 |
/* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
|
|
|
587943 |
mon_refresh_display(NULL);
|
|
|
587943 |
return FALSE;
|
|
|
587943 |
@@ -804,16 +804,17 @@ fencing_connect(void)
|
|
|
587943 |
static int
|
|
|
587943 |
cib_connect(gboolean full)
|
|
|
587943 |
{
|
|
|
587943 |
- int rc = pcmk_ok;
|
|
|
587943 |
+ int rc = pcmk_rc_ok;
|
|
|
587943 |
static gboolean need_pass = TRUE;
|
|
|
587943 |
|
|
|
587943 |
- CRM_CHECK(cib != NULL, return -EINVAL);
|
|
|
587943 |
+ CRM_CHECK(cib != NULL, return EINVAL);
|
|
|
587943 |
|
|
|
587943 |
if (getenv("CIB_passwd") != NULL) {
|
|
|
587943 |
need_pass = FALSE;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- if (cib->state == cib_connected_query || cib->state == cib_connected_command) {
|
|
|
587943 |
+ if (cib->state == cib_connected_query ||
|
|
|
587943 |
+ cib->state == cib_connected_command) {
|
|
|
587943 |
return rc;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
@@ -825,37 +826,44 @@ cib_connect(gboolean full)
|
|
|
587943 |
* @TODO Add a password prompt (maybe including input) function to
|
|
|
587943 |
* pcmk__output_t and use it in libcib.
|
|
|
587943 |
*/
|
|
|
587943 |
- if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) {
|
|
|
587943 |
+ if ((output_format == mon_output_console) &&
|
|
|
587943 |
+ need_pass &&
|
|
|
587943 |
+ (cib->variant == cib_remote)) {
|
|
|
587943 |
need_pass = FALSE;
|
|
|
587943 |
print_as(output_format, "Password:");
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- rc = cib->cmds->signon(cib, crm_system_name, cib_query);
|
|
|
587943 |
- if (rc != pcmk_ok) {
|
|
|
587943 |
+ rc = pcmk_legacy2rc(cib->cmds->signon(cib, crm_system_name, cib_query));
|
|
|
587943 |
+ if (rc != pcmk_rc_ok) {
|
|
|
587943 |
out->err(out, "Could not connect to the CIB: %s",
|
|
|
587943 |
- pcmk_strerror(rc));
|
|
|
587943 |
+ pcmk_rc_str(rc));
|
|
|
587943 |
return rc;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call);
|
|
|
587943 |
+ rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, ¤t_cib,
|
|
|
587943 |
+ cib_scope_local | cib_sync_call));
|
|
|
587943 |
|
|
|
587943 |
- if (rc == pcmk_ok && full) {
|
|
|
587943 |
- rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
|
|
|
587943 |
- if (rc == -EPROTONOSUPPORT) {
|
|
|
587943 |
- print_as
|
|
|
587943 |
- (output_format, "Notification setup not supported, won't be able to reconnect after failure");
|
|
|
587943 |
+ if (rc == pcmk_rc_ok && full) {
|
|
|
587943 |
+ rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib,
|
|
|
587943 |
+ mon_cib_connection_destroy));
|
|
|
587943 |
+ if (rc == EPROTONOSUPPORT) {
|
|
|
587943 |
+ print_as(output_format,
|
|
|
587943 |
+ "Notification setup not supported, won't be "
|
|
|
587943 |
+ "able to reconnect after failure");
|
|
|
587943 |
if (output_format == mon_output_console) {
|
|
|
587943 |
sleep(2);
|
|
|
587943 |
}
|
|
|
587943 |
- rc = pcmk_ok;
|
|
|
587943 |
+ rc = pcmk_rc_ok;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- if (rc == pcmk_ok) {
|
|
|
587943 |
- cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
|
|
|
587943 |
- rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY,
|
|
|
587943 |
+ crm_diff_update);
|
|
|
587943 |
+ rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib,
|
|
|
587943 |
+ T_CIB_DIFF_NOTIFY, crm_diff_update));
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- if (rc != pcmk_ok) {
|
|
|
587943 |
+ if (rc != pcmk_rc_ok) {
|
|
|
587943 |
out->err(out, "Notification setup failed, could not monitor CIB actions");
|
|
|
587943 |
clean_up_cib_connection();
|
|
|
587943 |
clean_up_fencing_connection();
|
|
|
587943 |
@@ -1206,20 +1214,20 @@ reconcile_output_format(pcmk__common_args_t *args) {
|
|
|
587943 |
static void
|
|
|
587943 |
handle_connection_failures(int rc)
|
|
|
587943 |
{
|
|
|
587943 |
- if (rc == pcmk_ok) {
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
return;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
if (output_format == mon_output_monitor) {
|
|
|
587943 |
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s",
|
|
|
587943 |
- pcmk_strerror(rc));
|
|
|
587943 |
+ pcmk_rc_str(rc));
|
|
|
587943 |
rc = MON_STATUS_CRIT;
|
|
|
587943 |
- } else if (rc == -ENOTCONN) {
|
|
|
587943 |
+ } else if (rc == ENOTCONN) {
|
|
|
587943 |
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node");
|
|
|
587943 |
- rc = crm_errno2exit(rc);
|
|
|
587943 |
+ rc = pcmk_rc2exitc(rc);
|
|
|
587943 |
} else {
|
|
|
587943 |
- g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc));
|
|
|
587943 |
- rc = crm_errno2exit(rc);
|
|
|
587943 |
+ g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc));
|
|
|
587943 |
+ rc = pcmk_rc2exitc(rc);
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
clean_up(rc);
|
|
|
587943 |
@@ -1478,7 +1486,7 @@ main(int argc, char **argv)
|
|
|
587943 |
fencing_connect();
|
|
|
587943 |
rc = cib_connect(TRUE);
|
|
|
587943 |
|
|
|
587943 |
- if (rc != pcmk_ok) {
|
|
|
587943 |
+ if (rc != pcmk_rc_ok) {
|
|
|
587943 |
sleep(options.reconnect_msec / 1000);
|
|
|
587943 |
#if CURSES_ENABLED
|
|
|
587943 |
if (output_format == mon_output_console) {
|
|
|
587943 |
@@ -1490,7 +1498,7 @@ main(int argc, char **argv)
|
|
|
587943 |
printf("Writing html to %s ...\n", args->output_dest);
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- } while (rc == -ENOTCONN);
|
|
|
587943 |
+ } while (rc == ENOTCONN);
|
|
|
587943 |
|
|
|
587943 |
handle_connection_failures(rc);
|
|
|
587943 |
set_fencing_options(interactive_fence_level);
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From 9b8fb7b608280f65a3b76d66a99b575a4da70944 Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Mon, 25 Jan 2021 18:26:04 +0100
|
|
|
587943 |
Subject: [PATCH 3/7] Fix: tools: Report pacemakerd in state waiting for sbd
|
|
|
587943 |
|
|
|
587943 |
Waiting for pacemakerd to report that all subdaemons are started
|
|
|
587943 |
before trying to connect to cib and fencer should remove the
|
|
|
587943 |
potential race introduced by making fencer connection failure
|
|
|
587943 |
non fatal when cib is faster to come up.
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
|
|
|
587943 |
tools/crm_mon.h | 1 +
|
|
|
587943 |
2 files changed, 148 insertions(+), 11 deletions(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 3fbac5f..61f070d 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -132,6 +132,7 @@ static void handle_connection_failures(int rc);
|
|
|
587943 |
static int mon_refresh_display(gpointer user_data);
|
|
|
587943 |
static int cib_connect(gboolean full);
|
|
|
587943 |
static int fencing_connect(void);
|
|
|
587943 |
+static int pacemakerd_status(void);
|
|
|
587943 |
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
|
|
|
587943 |
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
|
|
|
587943 |
static void refresh_after_event(gboolean data_updated);
|
|
|
587943 |
@@ -689,11 +690,13 @@ reconnect_after_timeout(gpointer data)
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
print_as(output_format, "Reconnecting...\n");
|
|
|
587943 |
- fencing_connect();
|
|
|
587943 |
- if (cib_connect(TRUE) == pcmk_rc_ok) {
|
|
|
587943 |
- /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
|
|
|
587943 |
- mon_refresh_display(NULL);
|
|
|
587943 |
- return FALSE;
|
|
|
587943 |
+ if (pacemakerd_status() == pcmk_rc_ok) {
|
|
|
587943 |
+ fencing_connect();
|
|
|
587943 |
+ if (cib_connect(TRUE) == pcmk_rc_ok) {
|
|
|
587943 |
+ /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
|
|
|
587943 |
+ mon_refresh_display(NULL);
|
|
|
587943 |
+ return FALSE;
|
|
|
587943 |
+ }
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
|
|
|
587943 |
@@ -840,6 +843,13 @@ cib_connect(gboolean full)
|
|
|
587943 |
return rc;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
+#if CURSES_ENABLED
|
|
|
587943 |
+ /* just show this if refresh is gonna remove all traces */
|
|
|
587943 |
+ if (output_format == mon_output_console) {
|
|
|
587943 |
+ print_as(output_format ,"Waiting for CIB ...\n");
|
|
|
587943 |
+ }
|
|
|
587943 |
+#endif
|
|
|
587943 |
+
|
|
|
587943 |
rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, ¤t_cib,
|
|
|
587943 |
cib_scope_local | cib_sync_call));
|
|
|
587943 |
|
|
|
587943 |
@@ -904,6 +914,121 @@ set_fencing_options(int level)
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
+/* Before trying to connect to fencer or cib check for state of
|
|
|
587943 |
+ pacemakerd - just no sense in trying till pacemakerd has
|
|
|
587943 |
+ taken care of starting all the sub-processes
|
|
|
587943 |
+
|
|
|
587943 |
+ Only noteworthy thing to show here is when pacemakerd is
|
|
|
587943 |
+ waiting for startup-trigger from SBD.
|
|
|
587943 |
+ */
|
|
|
587943 |
+static void
|
|
|
587943 |
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
|
|
|
587943 |
+ enum pcmk_ipc_event event_type, crm_exit_t status,
|
|
|
587943 |
+ void *event_data, void *user_data)
|
|
|
587943 |
+{
|
|
|
587943 |
+ pcmk_pacemakerd_api_reply_t *reply = event_data;
|
|
|
587943 |
+ enum pcmk_pacemakerd_state *state =
|
|
|
587943 |
+ (enum pcmk_pacemakerd_state *) user_data;
|
|
|
587943 |
+
|
|
|
587943 |
+ /* we are just interested in the latest reply */
|
|
|
587943 |
+ *state = pcmk_pacemakerd_state_invalid;
|
|
|
587943 |
+
|
|
|
587943 |
+ switch (event_type) {
|
|
|
587943 |
+ case pcmk_ipc_event_reply:
|
|
|
587943 |
+ break;
|
|
|
587943 |
+
|
|
|
587943 |
+ default:
|
|
|
587943 |
+ return;
|
|
|
587943 |
+ }
|
|
|
587943 |
+
|
|
|
587943 |
+ if (status != CRM_EX_OK) {
|
|
|
587943 |
+ out->err(out, "Bad reply from pacemakerd: %s",
|
|
|
587943 |
+ crm_exit_str(status));
|
|
|
587943 |
+ return;
|
|
|
587943 |
+ }
|
|
|
587943 |
+
|
|
|
587943 |
+ if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
|
|
|
587943 |
+ out->err(out, "Unknown reply type %d from pacemakerd",
|
|
|
587943 |
+ reply->reply_type);
|
|
|
587943 |
+ } else {
|
|
|
587943 |
+ if ((reply->data.ping.last_good != (time_t) 0) &&
|
|
|
587943 |
+ (reply->data.ping.status == pcmk_rc_ok)) {
|
|
|
587943 |
+ *state = reply->data.ping.state;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ }
|
|
|
587943 |
+}
|
|
|
587943 |
+
|
|
|
587943 |
+static int
|
|
|
587943 |
+pacemakerd_status(void)
|
|
|
587943 |
+{
|
|
|
587943 |
+ int rc = pcmk_rc_ok;
|
|
|
587943 |
+ pcmk_ipc_api_t *pacemakerd_api = NULL;
|
|
|
587943 |
+ enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid;
|
|
|
587943 |
+
|
|
|
587943 |
+ if (!pcmk_is_set(options.mon_ops, mon_op_cib_native)) {
|
|
|
587943 |
+ /* we don't need fully functional pacemakerd otherwise */
|
|
|
587943 |
+ return rc;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ if (cib != NULL &&
|
|
|
587943 |
+ (cib->state == cib_connected_query ||
|
|
|
587943 |
+ cib->state == cib_connected_command)) {
|
|
|
587943 |
+ /* As long as we have a cib-connection let's go with
|
|
|
587943 |
+ * that to fetch further cluster-status and avoid
|
|
|
587943 |
+ * unnecessary pings to pacemakerd.
|
|
|
587943 |
+ * If cluster is going down and fencer is down already
|
|
|
587943 |
+ * this will lead to a silently failing fencer reconnect.
|
|
|
587943 |
+ * On cluster startup we shouldn't see this situation
|
|
|
587943 |
+ * as first we do is wait for pacemakerd to report all
|
|
|
587943 |
+ * daemons running.
|
|
|
587943 |
+ */
|
|
|
587943 |
+ return rc;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
|
|
|
587943 |
+ if (pacemakerd_api == NULL) {
|
|
|
587943 |
+ out->err(out, "Could not connect to pacemakerd: %s",
|
|
|
587943 |
+ pcmk_rc_str(rc));
|
|
|
587943 |
+ /* this is unrecoverable so return with rc we have */
|
|
|
587943 |
+ return rc;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state);
|
|
|
587943 |
+ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_poll);
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name);
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_msec/2);
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ pcmk_dispatch_ipc(pacemakerd_api);
|
|
|
587943 |
+ rc = ENOTCONN;
|
|
|
587943 |
+ switch (state) {
|
|
|
587943 |
+ case pcmk_pacemakerd_state_running:
|
|
|
587943 |
+ rc = pcmk_rc_ok;
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_starting_daemons:
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons starting ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_wait_for_ping:
|
|
|
587943 |
+ print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_shutting_down:
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons shutting down ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_shutdown_complete:
|
|
|
587943 |
+ /* assuming pacemakerd doesn't dispatch any pings after entering
|
|
|
587943 |
+ * that state unless it is waiting for SBD
|
|
|
587943 |
+ */
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ default:
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ }
|
|
|
587943 |
+ }
|
|
|
587943 |
+ }
|
|
|
587943 |
+ pcmk_free_ipc_api(pacemakerd_api);
|
|
|
587943 |
+ /* returning with ENOTCONN triggers a retry */
|
|
|
587943 |
+ return (rc == pcmk_rc_ok)?rc:ENOTCONN;
|
|
|
587943 |
+}
|
|
|
587943 |
+
|
|
|
587943 |
#if CURSES_ENABLED
|
|
|
587943 |
static const char *
|
|
|
587943 |
get_option_desc(char c)
|
|
|
587943 |
@@ -1033,8 +1158,11 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
refresh:
|
|
|
587943 |
- fencing_connect();
|
|
|
587943 |
- rc = cib_connect(FALSE);
|
|
|
587943 |
+ rc = pacemakerd_status();
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ fencing_connect();
|
|
|
587943 |
+ rc = cib_connect(FALSE);
|
|
|
587943 |
+ }
|
|
|
587943 |
if (rc == pcmk_rc_ok) {
|
|
|
587943 |
mon_refresh_display(NULL);
|
|
|
587943 |
} else {
|
|
|
587943 |
@@ -1238,9 +1366,13 @@ one_shot(void)
|
|
|
587943 |
{
|
|
|
587943 |
int rc;
|
|
|
587943 |
|
|
|
587943 |
- fencing_connect();
|
|
|
587943 |
+ rc = pacemakerd_status();
|
|
|
587943 |
+
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ fencing_connect();
|
|
|
587943 |
+ rc = cib_connect(FALSE);
|
|
|
587943 |
+ }
|
|
|
587943 |
|
|
|
587943 |
- rc = cib_connect(FALSE);
|
|
|
587943 |
if (rc == pcmk_rc_ok) {
|
|
|
587943 |
mon_refresh_display(NULL);
|
|
|
587943 |
} else {
|
|
|
587943 |
@@ -1316,6 +1448,7 @@ main(int argc, char **argv)
|
|
|
587943 |
|
|
|
587943 |
case cib_native:
|
|
|
587943 |
/* cib & fencing - everything available */
|
|
|
587943 |
+ options.mon_ops |= mon_op_cib_native;
|
|
|
587943 |
break;
|
|
|
587943 |
|
|
|
587943 |
case cib_file:
|
|
|
587943 |
@@ -1483,8 +1616,11 @@ main(int argc, char **argv)
|
|
|
587943 |
do {
|
|
|
587943 |
print_as(output_format ,"Waiting until cluster is available on this node ...\n");
|
|
|
587943 |
|
|
|
587943 |
- fencing_connect();
|
|
|
587943 |
- rc = cib_connect(TRUE);
|
|
|
587943 |
+ rc = pacemakerd_status();
|
|
|
587943 |
+ if (rc == pcmk_rc_ok) {
|
|
|
587943 |
+ fencing_connect();
|
|
|
587943 |
+ rc = cib_connect(TRUE);
|
|
|
587943 |
+ }
|
|
|
587943 |
|
|
|
587943 |
if (rc != pcmk_rc_ok) {
|
|
|
587943 |
sleep(options.reconnect_msec / 1000);
|
|
|
587943 |
diff --git a/tools/crm_mon.h b/tools/crm_mon.h
|
|
|
587943 |
index 73c926d..b556913 100644
|
|
|
587943 |
--- a/tools/crm_mon.h
|
|
|
587943 |
+++ b/tools/crm_mon.h
|
|
|
587943 |
@@ -91,6 +91,7 @@ typedef enum mon_output_format_e {
|
|
|
587943 |
#define mon_op_print_brief (0x0200U)
|
|
|
587943 |
#define mon_op_print_pending (0x0400U)
|
|
|
587943 |
#define mon_op_print_clone_detail (0x0800U)
|
|
|
587943 |
+#define mon_op_cib_native (0x1000U)
|
|
|
587943 |
|
|
|
587943 |
#define mon_op_default (mon_op_print_pending | mon_op_fence_history | mon_op_fence_connect)
|
|
|
587943 |
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From 046516dbe66fb2c52b90f36215cf60c5ad3c269b Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Thu, 28 Jan 2021 16:38:22 +0100
|
|
|
587943 |
Subject: [PATCH 4/7] Refactor: crm_mon: do refreshes rather via
|
|
|
587943 |
refresh_after_event
|
|
|
587943 |
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 35 ++++++++++++++---------------------
|
|
|
587943 |
1 file changed, 14 insertions(+), 21 deletions(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 61f070d..195e7b5 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -135,7 +135,7 @@ static int fencing_connect(void);
|
|
|
587943 |
static int pacemakerd_status(void);
|
|
|
587943 |
static void mon_st_callback_event(stonith_t * st, stonith_event_t * e);
|
|
|
587943 |
static void mon_st_callback_display(stonith_t * st, stonith_event_t * e);
|
|
|
587943 |
-static void refresh_after_event(gboolean data_updated);
|
|
|
587943 |
+static void refresh_after_event(gboolean data_updated, gboolean enforce);
|
|
|
587943 |
|
|
|
587943 |
static unsigned int
|
|
|
587943 |
all_includes(mon_output_format_t fmt) {
|
|
|
587943 |
@@ -694,13 +694,13 @@ reconnect_after_timeout(gpointer data)
|
|
|
587943 |
fencing_connect();
|
|
|
587943 |
if (cib_connect(TRUE) == pcmk_rc_ok) {
|
|
|
587943 |
/* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
|
|
|
587943 |
- mon_refresh_display(NULL);
|
|
|
587943 |
+ refresh_after_event(FALSE, TRUE);
|
|
|
587943 |
return FALSE;
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
|
|
|
587943 |
- return TRUE;
|
|
|
587943 |
+ return FALSE;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
/* Called from various places when we are disconnected from the CIB or from the
|
|
|
587943 |
@@ -1057,7 +1057,6 @@ static gboolean
|
|
|
587943 |
detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data)
|
|
|
587943 |
{
|
|
|
587943 |
int c;
|
|
|
587943 |
- int rc;
|
|
|
587943 |
gboolean config_mode = FALSE;
|
|
|
587943 |
|
|
|
587943 |
while (1) {
|
|
|
587943 |
@@ -1158,16 +1157,7 @@ detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_dat
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
refresh:
|
|
|
587943 |
- rc = pacemakerd_status();
|
|
|
587943 |
- if (rc == pcmk_rc_ok) {
|
|
|
587943 |
- fencing_connect();
|
|
|
587943 |
- rc = cib_connect(FALSE);
|
|
|
587943 |
- }
|
|
|
587943 |
- if (rc == pcmk_rc_ok) {
|
|
|
587943 |
- mon_refresh_display(NULL);
|
|
|
587943 |
- } else {
|
|
|
587943 |
- handle_connection_failures(rc);
|
|
|
587943 |
- }
|
|
|
587943 |
+ refresh_after_event(FALSE, TRUE);
|
|
|
587943 |
|
|
|
587943 |
return TRUE;
|
|
|
587943 |
}
|
|
|
587943 |
@@ -2087,7 +2077,7 @@ crm_diff_update(const char *event, xmlNode * msg)
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
stale = FALSE;
|
|
|
587943 |
- refresh_after_event(cib_updated);
|
|
|
587943 |
+ refresh_after_event(cib_updated, FALSE);
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
static int
|
|
|
587943 |
@@ -2246,7 +2236,7 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
|
|
|
587943 |
* fencing event is received or a CIB diff occurrs.
|
|
|
587943 |
*/
|
|
|
587943 |
static void
|
|
|
587943 |
-refresh_after_event(gboolean data_updated)
|
|
|
587943 |
+refresh_after_event(gboolean data_updated, gboolean enforce)
|
|
|
587943 |
{
|
|
|
587943 |
static int updates = 0;
|
|
|
587943 |
time_t now = time(NULL);
|
|
|
587943 |
@@ -2259,12 +2249,15 @@ refresh_after_event(gboolean data_updated)
|
|
|
587943 |
refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL);
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
- if ((now - last_refresh) > (options.reconnect_msec / 1000)) {
|
|
|
587943 |
- mainloop_set_trigger(refresh_trigger);
|
|
|
587943 |
+ if (reconnect_timer > 0) {
|
|
|
587943 |
+ /* we will receive a refresh request after successful reconnect */
|
|
|
587943 |
mainloop_timer_stop(refresh_timer);
|
|
|
587943 |
- updates = 0;
|
|
|
587943 |
+ return;
|
|
|
587943 |
+ }
|
|
|
587943 |
|
|
|
587943 |
- } else if(updates >= 10) {
|
|
|
587943 |
+ if (enforce ||
|
|
|
587943 |
+ now - last_refresh > options.reconnect_msec / 1000 ||
|
|
|
587943 |
+ updates >= 10) {
|
|
|
587943 |
mainloop_set_trigger(refresh_trigger);
|
|
|
587943 |
mainloop_timer_stop(refresh_timer);
|
|
|
587943 |
updates = 0;
|
|
|
587943 |
@@ -2285,7 +2278,7 @@ mon_st_callback_display(stonith_t * st, stonith_event_t * e)
|
|
|
587943 |
mon_cib_connection_destroy(NULL);
|
|
|
587943 |
} else {
|
|
|
587943 |
print_dot(output_format);
|
|
|
587943 |
- refresh_after_event(TRUE);
|
|
|
587943 |
+ refresh_after_event(TRUE, FALSE);
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From a63af2713f96719fc1d5ef594eb033d0f251187f Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Thu, 28 Jan 2021 16:52:57 +0100
|
|
|
587943 |
Subject: [PATCH 5/7] Fix: crm_mon: retry fencer connection as not fatal
|
|
|
587943 |
initially
|
|
|
587943 |
|
|
|
587943 |
and cleanup fencer api to not leak memory on multiple reconnects
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 8 +++++++-
|
|
|
587943 |
1 file changed, 7 insertions(+), 1 deletion(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 195e7b5..a768ca9 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -798,7 +798,7 @@ fencing_connect(void)
|
|
|
587943 |
st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display);
|
|
|
587943 |
}
|
|
|
587943 |
} else {
|
|
|
587943 |
- st = NULL;
|
|
|
587943 |
+ clean_up_fencing_connection();
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
return rc;
|
|
|
587943 |
@@ -2255,6 +2255,12 @@ refresh_after_event(gboolean data_updated, gboolean enforce)
|
|
|
587943 |
return;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
+ /* as we're not handling initial failure of fencer-connection as
|
|
|
587943 |
+ * fatal give it a retry here
|
|
|
587943 |
+ * not getting here if cib-reconnection is already on the way
|
|
|
587943 |
+ */
|
|
|
587943 |
+ fencing_connect();
|
|
|
587943 |
+
|
|
|
587943 |
if (enforce ||
|
|
|
587943 |
now - last_refresh > options.reconnect_msec / 1000 ||
|
|
|
587943 |
updates >= 10) {
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From b6f4b5dfc0b5fec8cdc029409fc61252de019415 Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Thu, 28 Jan 2021 18:08:43 +0100
|
|
|
587943 |
Subject: [PATCH 6/7] Refactor: crm_mon: have reconnect-timer removed
|
|
|
587943 |
implicitly
|
|
|
587943 |
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 12 ++++--------
|
|
|
587943 |
1 file changed, 4 insertions(+), 8 deletions(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index a768ca9..4f73379 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -684,23 +684,19 @@ reconnect_after_timeout(gpointer data)
|
|
|
587943 |
}
|
|
|
587943 |
#endif
|
|
|
587943 |
|
|
|
587943 |
- if (reconnect_timer > 0) {
|
|
|
587943 |
- g_source_remove(reconnect_timer);
|
|
|
587943 |
- reconnect_timer = 0;
|
|
|
587943 |
- }
|
|
|
587943 |
-
|
|
|
587943 |
print_as(output_format, "Reconnecting...\n");
|
|
|
587943 |
if (pacemakerd_status() == pcmk_rc_ok) {
|
|
|
587943 |
fencing_connect();
|
|
|
587943 |
if (cib_connect(TRUE) == pcmk_rc_ok) {
|
|
|
587943 |
- /* Redraw the screen and reinstall ourselves to get called after another reconnect_msec. */
|
|
|
587943 |
+ /* trigger redrawing the screen (needs reconnect_timer == 0) */
|
|
|
587943 |
+ reconnect_timer = 0;
|
|
|
587943 |
refresh_after_event(FALSE, TRUE);
|
|
|
587943 |
- return FALSE;
|
|
|
587943 |
+ return G_SOURCE_REMOVE;
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
reconnect_timer = g_timeout_add(options.reconnect_msec, reconnect_after_timeout, NULL);
|
|
|
587943 |
- return FALSE;
|
|
|
587943 |
+ return G_SOURCE_REMOVE;
|
|
|
587943 |
}
|
|
|
587943 |
|
|
|
587943 |
/* Called from various places when we are disconnected from the CIB or from the
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|
|
|
587943 |
|
|
|
587943 |
From 586e69ec38d5273b348c42a61b9bc7bbcc2b93b3 Mon Sep 17 00:00:00 2001
|
|
|
587943 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
587943 |
Date: Thu, 28 Jan 2021 21:08:16 +0100
|
|
|
587943 |
Subject: [PATCH 7/7] Fix: crm_mon: suppress pacemakerd-status for non-text
|
|
|
587943 |
output
|
|
|
587943 |
|
|
|
587943 |
---
|
|
|
587943 |
tools/crm_mon.c | 53 ++++++++++++++++++++++++++++++++---------------------
|
|
|
587943 |
1 file changed, 32 insertions(+), 21 deletions(-)
|
|
|
587943 |
|
|
|
587943 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
587943 |
index 4f73379..d4d4ac3 100644
|
|
|
587943 |
--- a/tools/crm_mon.c
|
|
|
587943 |
+++ b/tools/crm_mon.c
|
|
|
587943 |
@@ -995,27 +995,38 @@ pacemakerd_status(void)
|
|
|
587943 |
if (rc == pcmk_rc_ok) {
|
|
|
587943 |
pcmk_dispatch_ipc(pacemakerd_api);
|
|
|
587943 |
rc = ENOTCONN;
|
|
|
587943 |
- switch (state) {
|
|
|
587943 |
- case pcmk_pacemakerd_state_running:
|
|
|
587943 |
- rc = pcmk_rc_ok;
|
|
|
587943 |
- break;
|
|
|
587943 |
- case pcmk_pacemakerd_state_starting_daemons:
|
|
|
587943 |
- print_as(output_format ,"Pacemaker daemons starting ...\n");
|
|
|
587943 |
- break;
|
|
|
587943 |
- case pcmk_pacemakerd_state_wait_for_ping:
|
|
|
587943 |
- print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
|
|
|
587943 |
- break;
|
|
|
587943 |
- case pcmk_pacemakerd_state_shutting_down:
|
|
|
587943 |
- print_as(output_format ,"Pacemaker daemons shutting down ...\n");
|
|
|
587943 |
- break;
|
|
|
587943 |
- case pcmk_pacemakerd_state_shutdown_complete:
|
|
|
587943 |
- /* assuming pacemakerd doesn't dispatch any pings after entering
|
|
|
587943 |
- * that state unless it is waiting for SBD
|
|
|
587943 |
- */
|
|
|
587943 |
- print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
|
|
|
587943 |
- break;
|
|
|
587943 |
- default:
|
|
|
587943 |
- break;
|
|
|
587943 |
+ if ((output_format == mon_output_console) ||
|
|
|
587943 |
+ (output_format == mon_output_plain)) {
|
|
|
587943 |
+ switch (state) {
|
|
|
587943 |
+ case pcmk_pacemakerd_state_running:
|
|
|
587943 |
+ rc = pcmk_rc_ok;
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_starting_daemons:
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons starting ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_wait_for_ping:
|
|
|
587943 |
+ print_as(output_format ,"Waiting for startup-trigger from SBD ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_shutting_down:
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons shutting down ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ case pcmk_pacemakerd_state_shutdown_complete:
|
|
|
587943 |
+ /* assuming pacemakerd doesn't dispatch any pings after entering
|
|
|
587943 |
+ * that state unless it is waiting for SBD
|
|
|
587943 |
+ */
|
|
|
587943 |
+ print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n");
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ default:
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ }
|
|
|
587943 |
+ } else {
|
|
|
587943 |
+ switch (state) {
|
|
|
587943 |
+ case pcmk_pacemakerd_state_running:
|
|
|
587943 |
+ rc = pcmk_rc_ok;
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ default:
|
|
|
587943 |
+ break;
|
|
|
587943 |
+ }
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
}
|
|
|
587943 |
--
|
|
|
587943 |
1.8.3.1
|
|
|
587943 |
|