|
|
305287 |
From 0a884f325e1049febc28bf0419ab307dd0bce5af Mon Sep 17 00:00:00 2001
|
|
|
305287 |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
305287 |
Date: Thu, 16 May 2019 20:04:57 -0500
|
|
|
305287 |
Subject: [PATCH] Log: various: improve fencer connection messages
|
|
|
305287 |
|
|
|
305287 |
Previously, log messages around fencer connections were inconsistent.
|
|
|
305287 |
|
|
|
305287 |
This attempts to make them more consistent by: having stonith_api_signon() log
|
|
|
305287 |
only at debug level, letting the callers log at a level appropriate to the
|
|
|
305287 |
situation using the return code; functionizing retrying a connection; and
|
|
|
305287 |
using similar wording across clients.
|
|
|
305287 |
|
|
|
305287 |
This also does a bit of refactoring for better error checking and improved
|
|
|
305287 |
efficiency.
|
|
|
305287 |
---
|
|
|
305287 |
daemons/controld/controld_control.c | 7 +-
|
|
|
305287 |
daemons/controld/controld_te_utils.c | 59 ++++++-----
|
|
|
305287 |
daemons/execd/pacemaker-execd.c | 28 ++---
|
|
|
305287 |
daemons/fenced/cts-fence-helper.c | 38 +++----
|
|
|
305287 |
include/crm/stonith-ng.h | 4 +
|
|
|
305287 |
lib/fencing/st_client.c | 195 ++++++++++++++++++++---------------
|
|
|
305287 |
tools/crm_mon.c | 1 -
|
|
|
305287 |
tools/stonith_admin.c | 29 +-----
|
|
|
305287 |
8 files changed, 181 insertions(+), 180 deletions(-)
|
|
|
305287 |
|
|
|
305287 |
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
|
|
305287 |
index 89b5b5d..6d9f335 100644
|
|
|
305287 |
--- a/daemons/controld/controld_control.c
|
|
|
305287 |
+++ b/daemons/controld/controld_control.c
|
|
|
305287 |
@@ -628,10 +628,11 @@ do_started(long long action,
|
|
|
305287 |
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
+ // Try connecting to fencer (retrying later in mainloop if failed)
|
|
|
305287 |
if (stonith_reconnect == NULL) {
|
|
|
305287 |
- int dummy;
|
|
|
305287 |
-
|
|
|
305287 |
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
|
|
|
305287 |
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
|
|
|
305287 |
+ te_connect_stonith,
|
|
|
305287 |
+ GINT_TO_POINTER(TRUE));
|
|
|
305287 |
}
|
|
|
305287 |
set_bit(fsa_input_register, R_ST_REQUIRED);
|
|
|
305287 |
mainloop_set_trigger(stonith_reconnect);
|
|
|
305287 |
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
|
|
|
305287 |
index 5606ed6..22f83ad 100644
|
|
|
305287 |
--- a/daemons/controld/controld_te_utils.c
|
|
|
305287 |
+++ b/daemons/controld/controld_te_utils.c
|
|
|
305287 |
@@ -1,5 +1,5 @@
|
|
|
305287 |
/*
|
|
|
305287 |
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
305287 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
305287 |
*
|
|
|
305287 |
* This source code is licensed under the GNU General Public License version 2
|
|
|
305287 |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
305287 |
@@ -385,10 +385,18 @@ te_trigger_stonith_history_sync(void)
|
|
|
305287 |
mainloop_timer_start(stonith_history_sync_timer);
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
+/*!
|
|
|
305287 |
+ * \brief Connect to fencer
|
|
|
305287 |
+ *
|
|
|
305287 |
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
|
|
|
305287 |
+ *
|
|
|
305287 |
+ * \return TRUE
|
|
|
305287 |
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
|
|
|
305287 |
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
|
|
|
305287 |
+ */
|
|
|
305287 |
gboolean
|
|
|
305287 |
te_connect_stonith(gpointer user_data)
|
|
|
305287 |
{
|
|
|
305287 |
- int lpc = 0;
|
|
|
305287 |
int rc = pcmk_ok;
|
|
|
305287 |
|
|
|
305287 |
if (stonith_api == NULL) {
|
|
|
305287 |
@@ -396,42 +404,41 @@ te_connect_stonith(gpointer user_data)
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
if (stonith_api->state != stonith_disconnected) {
|
|
|
305287 |
- crm_trace("Still connected");
|
|
|
305287 |
+ crm_trace("Already connected to fencer, no need to retry");
|
|
|
305287 |
return TRUE;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- for (lpc = 0; lpc < 30; lpc++) {
|
|
|
305287 |
- crm_debug("Attempting connection to fencing daemon...");
|
|
|
305287 |
-
|
|
|
305287 |
- sleep(1);
|
|
|
305287 |
- rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
|
|
305287 |
-
|
|
|
305287 |
- if (rc == pcmk_ok) {
|
|
|
305287 |
- break;
|
|
|
305287 |
+ if (user_data == NULL) {
|
|
|
305287 |
+ // Blocking (retry failures now until successful)
|
|
|
305287 |
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ crm_err("Could not connect to fencer in 30 attempts: %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
305287 |
}
|
|
|
305287 |
-
|
|
|
305287 |
- if (user_data != NULL) {
|
|
|
305287 |
+ } else {
|
|
|
305287 |
+ // Non-blocking (retry failures later in main loop)
|
|
|
305287 |
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
|
|
|
305287 |
- crm_err("Sign-in failed: triggered a retry");
|
|
|
305287 |
+ crm_err("Fencer connection failed (will retry): %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
305287 |
mainloop_set_trigger(stonith_reconnect);
|
|
|
305287 |
} else {
|
|
|
305287 |
- crm_info("Sign-in failed, but no longer required");
|
|
|
305287 |
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
305287 |
}
|
|
|
305287 |
return TRUE;
|
|
|
305287 |
}
|
|
|
305287 |
-
|
|
|
305287 |
- crm_err("Sign-in failed: pausing and trying again in 2s...");
|
|
|
305287 |
- sleep(1);
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
|
|
|
305287 |
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
|
|
|
305287 |
- tengine_stonith_connection_destroy);
|
|
|
305287 |
-
|
|
|
305287 |
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
|
|
|
305287 |
- tengine_stonith_notify);
|
|
|
305287 |
-
|
|
|
305287 |
- crm_trace("Connected");
|
|
|
305287 |
+ if (rc == pcmk_ok) {
|
|
|
305287 |
+ stonith_api->cmds->register_notification(stonith_api,
|
|
|
305287 |
+ T_STONITH_NOTIFY_DISCONNECT,
|
|
|
305287 |
+ tengine_stonith_connection_destroy);
|
|
|
305287 |
+ stonith_api->cmds->register_notification(stonith_api,
|
|
|
305287 |
+ T_STONITH_NOTIFY_FENCE,
|
|
|
305287 |
+ tengine_stonith_notify);
|
|
|
305287 |
+ }
|
|
|
305287 |
return TRUE;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
|
|
|
305287 |
index 21bb0ed..e2fdfca 100644
|
|
|
305287 |
--- a/daemons/execd/pacemaker-execd.c
|
|
|
305287 |
+++ b/daemons/execd/pacemaker-execd.c
|
|
|
305287 |
@@ -65,28 +65,20 @@ get_stonith_connection(void)
|
|
|
305287 |
stonith_api = NULL;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (!stonith_api) {
|
|
|
305287 |
- int rc = 0;
|
|
|
305287 |
- int tries = 10;
|
|
|
305287 |
+ if (stonith_api == NULL) {
|
|
|
305287 |
+ int rc = pcmk_ok;
|
|
|
305287 |
|
|
|
305287 |
stonith_api = stonith_api_new();
|
|
|
305287 |
- do {
|
|
|
305287 |
- rc = stonith_api->cmds->connect(stonith_api, "pacemaker-execd", NULL);
|
|
|
305287 |
- if (rc == pcmk_ok) {
|
|
|
305287 |
- stonith_api->cmds->register_notification(stonith_api,
|
|
|
305287 |
- T_STONITH_NOTIFY_DISCONNECT,
|
|
|
305287 |
- stonith_connection_destroy_cb);
|
|
|
305287 |
- break;
|
|
|
305287 |
- }
|
|
|
305287 |
- sleep(1);
|
|
|
305287 |
- tries--;
|
|
|
305287 |
- } while (tries);
|
|
|
305287 |
-
|
|
|
305287 |
- if (rc) {
|
|
|
305287 |
- crm_err("Unable to connect to stonith daemon to execute command. error: %s",
|
|
|
305287 |
- pcmk_strerror(rc));
|
|
|
305287 |
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ crm_err("Could not connect to fencer in 10 attempts: %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
305287 |
stonith_api_delete(stonith_api);
|
|
|
305287 |
stonith_api = NULL;
|
|
|
305287 |
+ } else {
|
|
|
305287 |
+ stonith_api->cmds->register_notification(stonith_api,
|
|
|
305287 |
+ T_STONITH_NOTIFY_DISCONNECT,
|
|
|
305287 |
+ stonith_connection_destroy_cb);
|
|
|
305287 |
}
|
|
|
305287 |
}
|
|
|
305287 |
return stonith_api;
|
|
|
305287 |
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
|
|
|
305287 |
index c5ce1ab..4552fc1 100644
|
|
|
305287 |
--- a/daemons/fenced/cts-fence-helper.c
|
|
|
305287 |
+++ b/daemons/fenced/cts-fence-helper.c
|
|
|
305287 |
@@ -1,5 +1,5 @@
|
|
|
305287 |
/*
|
|
|
305287 |
- * Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
305287 |
+ * Copyright 2009-2019 the Pacemaker project contributors
|
|
|
305287 |
*
|
|
|
305287 |
* This source code is licensed under the GNU General Public License version 2
|
|
|
305287 |
* or later (GPLv2+) WITHOUT ANY WARRANTY.
|
|
|
305287 |
@@ -124,8 +124,10 @@ passive_test(void)
|
|
|
305287 |
int rc = 0;
|
|
|
305287 |
|
|
|
305287 |
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
|
|
|
305287 |
- crm_debug("Connect: %d", rc);
|
|
|
305287 |
-
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ stonith_api_delete(st);
|
|
|
305287 |
+ crm_exit(CRM_EX_DISCONNECT);
|
|
|
305287 |
+ }
|
|
|
305287 |
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
|
|
|
305287 |
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
|
|
|
305287 |
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
|
|
|
305287 |
@@ -271,8 +273,10 @@ sanity_tests(void)
|
|
|
305287 |
int rc = 0;
|
|
|
305287 |
|
|
|
305287 |
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
|
|
|
305287 |
- crm_debug("Connect: %d", rc);
|
|
|
305287 |
-
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ stonith_api_delete(st);
|
|
|
305287 |
+ crm_exit(CRM_EX_DISCONNECT);
|
|
|
305287 |
+ }
|
|
|
305287 |
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
|
|
|
305287 |
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
|
|
|
305287 |
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
|
|
|
305287 |
@@ -295,7 +299,10 @@ standard_dev_test(void)
|
|
|
305287 |
stonith_key_value_t *params = NULL;
|
|
|
305287 |
|
|
|
305287 |
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
|
|
|
305287 |
- crm_debug("Connect: %d", rc);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ stonith_api_delete(st);
|
|
|
305287 |
+ crm_exit(CRM_EX_DISCONNECT);
|
|
|
305287 |
+ }
|
|
|
305287 |
|
|
|
305287 |
params = stonith_key_value_add(params, "pcmk_host_map", "some-host=pcmk-7 true_1_node1=3,4");
|
|
|
305287 |
|
|
|
305287 |
@@ -502,23 +509,12 @@ test_register_async_devices(int check_event)
|
|
|
305287 |
static void
|
|
|
305287 |
try_mainloop_connect(int check_event)
|
|
|
305287 |
{
|
|
|
305287 |
- int tries = 10;
|
|
|
305287 |
- int i = 0;
|
|
|
305287 |
- int rc = 0;
|
|
|
305287 |
+ int rc = stonith_api_connect_retry(st, crm_system_name, 10);
|
|
|
305287 |
|
|
|
305287 |
- for (i = 0; i < tries; i++) {
|
|
|
305287 |
- rc = st->cmds->connect(st, crm_system_name, NULL);
|
|
|
305287 |
-
|
|
|
305287 |
- if (!rc) {
|
|
|
305287 |
- crm_info("stonith client connection established");
|
|
|
305287 |
- mainloop_test_done(TRUE);
|
|
|
305287 |
- return;
|
|
|
305287 |
- } else {
|
|
|
305287 |
- crm_info("stonith client connection failed");
|
|
|
305287 |
- }
|
|
|
305287 |
- sleep(1);
|
|
|
305287 |
+ if (rc == pcmk_ok) {
|
|
|
305287 |
+ mainloop_test_done(TRUE);
|
|
|
305287 |
+ return;
|
|
|
305287 |
}
|
|
|
305287 |
-
|
|
|
305287 |
crm_err("API CONNECTION FAILURE");
|
|
|
305287 |
mainloop_test_done(FALSE);
|
|
|
305287 |
}
|
|
|
305287 |
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
|
|
|
305287 |
index b7365a9..b640732 100644
|
|
|
305287 |
--- a/include/crm/stonith-ng.h
|
|
|
305287 |
+++ b/include/crm/stonith-ng.h
|
|
|
305287 |
@@ -430,6 +430,10 @@ void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
|
|
|
305287 |
|
|
|
305287 |
void stonith_history_free(stonith_history_t *history);
|
|
|
305287 |
|
|
|
305287 |
+// Convenience functions
|
|
|
305287 |
+int stonith_api_connect_retry(stonith_t *st, const char *name,
|
|
|
305287 |
+ int max_attempts);
|
|
|
305287 |
+
|
|
|
305287 |
/* Basic helpers that allows nodes to be fenced and the history to be
|
|
|
305287 |
* queried without mainloop or the caller understanding the full API
|
|
|
305287 |
*
|
|
|
305287 |
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
|
305287 |
index 270ef8d..ceee944 100644
|
|
|
305287 |
--- a/lib/fencing/st_client.c
|
|
|
305287 |
+++ b/lib/fencing/st_client.c
|
|
|
305287 |
@@ -1,5 +1,5 @@
|
|
|
305287 |
/*
|
|
|
305287 |
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
|
|
|
305287 |
+ * Copyright 2004-2019 the Pacemaker project contributors
|
|
|
305287 |
*
|
|
|
305287 |
* This source code is licensed under the GNU Lesser General Public License
|
|
|
305287 |
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
|
|
|
305287 |
@@ -1415,14 +1415,21 @@ static int
|
|
|
305287 |
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
{
|
|
|
305287 |
int rc = pcmk_ok;
|
|
|
305287 |
- stonith_private_t *native = stonith->st_private;
|
|
|
305287 |
+ stonith_private_t *native = NULL;
|
|
|
305287 |
+ const char *display_name = name? name : "client";
|
|
|
305287 |
|
|
|
305287 |
static struct ipc_client_callbacks st_callbacks = {
|
|
|
305287 |
.dispatch = stonith_dispatch_internal,
|
|
|
305287 |
.destroy = stonith_connection_destroy
|
|
|
305287 |
};
|
|
|
305287 |
|
|
|
305287 |
- crm_trace("Connecting command channel");
|
|
|
305287 |
+ CRM_CHECK(stonith != NULL, return -EINVAL);
|
|
|
305287 |
+
|
|
|
305287 |
+ native = stonith->st_private;
|
|
|
305287 |
+ CRM_ASSERT(native != NULL);
|
|
|
305287 |
+
|
|
|
305287 |
+ crm_debug("Attempting fencer connection by %s with%s mainloop",
|
|
|
305287 |
+ display_name, (stonith_fd? "out" : ""));
|
|
|
305287 |
|
|
|
305287 |
stonith->state = stonith_connected_command;
|
|
|
305287 |
if (stonith_fd) {
|
|
|
305287 |
@@ -1432,8 +1439,9 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
if (native->ipc && crm_ipc_connect(native->ipc)) {
|
|
|
305287 |
*stonith_fd = crm_ipc_get_fd(native->ipc);
|
|
|
305287 |
} else if (native->ipc) {
|
|
|
305287 |
- crm_perror(LOG_ERR, "Connection to fencer failed");
|
|
|
305287 |
- rc = -ENOTCONN;
|
|
|
305287 |
+ crm_ipc_close(native->ipc);
|
|
|
305287 |
+ crm_ipc_destroy(native->ipc);
|
|
|
305287 |
+ native->ipc = NULL;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
} else {
|
|
|
305287 |
@@ -1444,11 +1452,8 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
if (native->ipc == NULL) {
|
|
|
305287 |
- crm_debug("Could not connect to the Stonith API");
|
|
|
305287 |
rc = -ENOTCONN;
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- if (rc == pcmk_ok) {
|
|
|
305287 |
+ } else {
|
|
|
305287 |
xmlNode *reply = NULL;
|
|
|
305287 |
xmlNode *hello = create_xml_node(NULL, "stonith_command");
|
|
|
305287 |
|
|
|
305287 |
@@ -1458,11 +1463,12 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
|
|
|
305287 |
|
|
|
305287 |
if (rc < 0) {
|
|
|
305287 |
- crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
|
|
|
305287 |
+ crm_debug("Couldn't register with the fencer: %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
|
|
|
305287 |
rc = -ECOMM;
|
|
|
305287 |
|
|
|
305287 |
} else if (reply == NULL) {
|
|
|
305287 |
- crm_err("Did not receive registration reply");
|
|
|
305287 |
+ crm_debug("Couldn't register with the fencer: no reply");
|
|
|
305287 |
rc = -EPROTO;
|
|
|
305287 |
|
|
|
305287 |
} else {
|
|
|
305287 |
@@ -1470,18 +1476,23 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
|
|
|
305287 |
|
|
|
305287 |
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
|
|
|
305287 |
- crm_err("Invalid registration message: %s", msg_type);
|
|
|
305287 |
- crm_log_xml_err(reply, "Bad reply");
|
|
|
305287 |
+ crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
|
|
|
305287 |
+ (msg_type? msg_type : "(missing)"));
|
|
|
305287 |
+ crm_log_xml_debug(reply, "Invalid fencer reply");
|
|
|
305287 |
rc = -EPROTO;
|
|
|
305287 |
|
|
|
305287 |
} else if (tmp_ticket == NULL) {
|
|
|
305287 |
- crm_err("No registration token provided");
|
|
|
305287 |
- crm_log_xml_err(reply, "Bad reply");
|
|
|
305287 |
+ crm_debug("Couldn't register with the fencer: no token in reply");
|
|
|
305287 |
+ crm_log_xml_debug(reply, "Invalid fencer reply");
|
|
|
305287 |
rc = -EPROTO;
|
|
|
305287 |
|
|
|
305287 |
} else {
|
|
|
305287 |
- crm_trace("Obtained registration token: %s", tmp_ticket);
|
|
|
305287 |
native->token = strdup(tmp_ticket);
|
|
|
305287 |
+#if HAVE_MSGFROMIPC_TIMEOUT
|
|
|
305287 |
+ stonith->call_timeout = MAX_IPC_DELAY;
|
|
|
305287 |
+#endif
|
|
|
305287 |
+ crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
|
|
|
305287 |
+ display_name, native->token);
|
|
|
305287 |
rc = pcmk_ok;
|
|
|
305287 |
}
|
|
|
305287 |
}
|
|
|
305287 |
@@ -1490,16 +1501,11 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
|
|
|
305287 |
free_xml(hello);
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (rc == pcmk_ok) {
|
|
|
305287 |
-#if HAVE_MSGFROMIPC_TIMEOUT
|
|
|
305287 |
- stonith->call_timeout = MAX_IPC_DELAY;
|
|
|
305287 |
-#endif
|
|
|
305287 |
- crm_debug("Connection to fencer successful");
|
|
|
305287 |
- return pcmk_ok;
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ crm_debug("Connection attempt to fencer by %s failed: %s "
|
|
|
305287 |
+ CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
|
|
|
305287 |
+ stonith->cmds->disconnect(stonith);
|
|
|
305287 |
}
|
|
|
305287 |
-
|
|
|
305287 |
- crm_debug("Connection to fencer failed: %s", pcmk_strerror(rc));
|
|
|
305287 |
- stonith->cmds->disconnect(stonith);
|
|
|
305287 |
return rc;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
@@ -2071,6 +2077,36 @@ stonith_api_new(void)
|
|
|
305287 |
return new_stonith;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
+/*!
|
|
|
305287 |
+ * \brief Make a blocking connection attempt to the fencer
|
|
|
305287 |
+ *
|
|
|
305287 |
+ * \param[in,out] st Fencer API object
|
|
|
305287 |
+ * \param[in] name Client name to use with fencer
|
|
|
305287 |
+ * \param[in] max_attempts Return error if this many attempts fail
|
|
|
305287 |
+ *
|
|
|
305287 |
+ * \return pcmk_ok on success, result of last attempt otherwise
|
|
|
305287 |
+ */
|
|
|
305287 |
+int
|
|
|
305287 |
+stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
|
|
|
305287 |
+{
|
|
|
305287 |
+ int rc = -EINVAL; // if max_attempts is not positive
|
|
|
305287 |
+
|
|
|
305287 |
+ for (int attempt = 1; attempt <= max_attempts; attempt++) {
|
|
|
305287 |
+ rc = st->cmds->connect(st, name, NULL);
|
|
|
305287 |
+ if (rc == pcmk_ok) {
|
|
|
305287 |
+ return pcmk_ok;
|
|
|
305287 |
+ } else if (attempt < max_attempts) {
|
|
|
305287 |
+ crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
|
|
|
305287 |
+ CRM_XS " rc=%d",
|
|
|
305287 |
+ attempt, max_attempts, pcmk_strerror(rc), rc);
|
|
|
305287 |
+ sleep(2);
|
|
|
305287 |
+ }
|
|
|
305287 |
+ }
|
|
|
305287 |
+ crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d",
|
|
|
305287 |
+ pcmk_strerror(rc), rc);
|
|
|
305287 |
+ return rc;
|
|
|
305287 |
+}
|
|
|
305287 |
+
|
|
|
305287 |
stonith_key_value_t *
|
|
|
305287 |
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
|
|
|
305287 |
{
|
|
|
305287 |
@@ -2122,85 +2158,78 @@ stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
|
|
|
305287 |
int
|
|
|
305287 |
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
|
|
|
305287 |
{
|
|
|
305287 |
- char *name = NULL;
|
|
|
305287 |
- const char *action = "reboot";
|
|
|
305287 |
-
|
|
|
305287 |
- int rc = -EPROTO;
|
|
|
305287 |
- stonith_t *st = NULL;
|
|
|
305287 |
- enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
|
|
|
305287 |
+ int rc = pcmk_ok;
|
|
|
305287 |
+ stonith_t *st = stonith_api_new();
|
|
|
305287 |
+ const char *action = off? "off" : "reboot";
|
|
|
305287 |
|
|
|
305287 |
api_log_open();
|
|
|
305287 |
- st = stonith_api_new();
|
|
|
305287 |
- if (st) {
|
|
|
305287 |
- rc = st->cmds->connect(st, "stonith-api", NULL);
|
|
|
305287 |
- if(rc != pcmk_ok) {
|
|
|
305287 |
- api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
|
|
|
305287 |
- }
|
|
|
305287 |
+ if (st == NULL) {
|
|
|
305287 |
+ api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
|
|
|
305287 |
+ action, nodeid, uname);
|
|
|
305287 |
+ return -EPROTO;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (uname != NULL) {
|
|
|
305287 |
- name = strdup(uname);
|
|
|
305287 |
-
|
|
|
305287 |
- } else if (nodeid > 0) {
|
|
|
305287 |
- opts |= st_opt_cs_nodeid;
|
|
|
305287 |
- name = crm_itoa(nodeid);
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- if (off) {
|
|
|
305287 |
- action = "off";
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- if (rc == pcmk_ok) {
|
|
|
305287 |
+ rc = st->cmds->connect(st, "stonith-api", NULL);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
|
|
|
305287 |
+ action, nodeid, uname, pcmk_strerror(rc), rc);
|
|
|
305287 |
+ } else {
|
|
|
305287 |
+ char *name = NULL;
|
|
|
305287 |
+ enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
|
|
|
305287 |
+
|
|
|
305287 |
+ if (uname != NULL) {
|
|
|
305287 |
+ name = strdup(uname);
|
|
|
305287 |
+ } else if (nodeid > 0) {
|
|
|
305287 |
+ opts |= st_opt_cs_nodeid;
|
|
|
305287 |
+ name = crm_itoa(nodeid);
|
|
|
305287 |
+ }
|
|
|
305287 |
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
|
|
|
305287 |
- if(rc != pcmk_ok) {
|
|
|
305287 |
- api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
|
|
|
305287 |
+ free(name);
|
|
|
305287 |
+
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
|
|
|
305287 |
+ action, nodeid, uname, pcmk_strerror(rc), rc);
|
|
|
305287 |
} else {
|
|
|
305287 |
- api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action);
|
|
|
305287 |
+ api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
|
|
|
305287 |
}
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (st) {
|
|
|
305287 |
- st->cmds->disconnect(st);
|
|
|
305287 |
- stonith_api_delete(st);
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- free(name);
|
|
|
305287 |
+ stonith_api_delete(st);
|
|
|
305287 |
return rc;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
time_t
|
|
|
305287 |
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
|
|
|
305287 |
{
|
|
|
305287 |
- int rc = 0;
|
|
|
305287 |
- char *name = NULL;
|
|
|
305287 |
-
|
|
|
305287 |
+ int rc = pcmk_ok;
|
|
|
305287 |
time_t when = 0;
|
|
|
305287 |
- stonith_t *st = NULL;
|
|
|
305287 |
+ stonith_t *st = stonith_api_new();
|
|
|
305287 |
stonith_history_t *history = NULL, *hp = NULL;
|
|
|
305287 |
- enum stonith_call_options opts = st_opt_sync_call;
|
|
|
305287 |
-
|
|
|
305287 |
- st = stonith_api_new();
|
|
|
305287 |
- if (st) {
|
|
|
305287 |
- rc = st->cmds->connect(st, "stonith-api", NULL);
|
|
|
305287 |
- if(rc != pcmk_ok) {
|
|
|
305287 |
- api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
|
|
|
305287 |
- }
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- if (uname != NULL) {
|
|
|
305287 |
- name = strdup(uname);
|
|
|
305287 |
|
|
|
305287 |
- } else if (nodeid > 0) {
|
|
|
305287 |
- opts |= st_opt_cs_nodeid;
|
|
|
305287 |
- name = crm_itoa(nodeid);
|
|
|
305287 |
+ if (st == NULL) {
|
|
|
305287 |
+ api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
|
|
|
305287 |
+ "API initialization failed", nodeid, uname);
|
|
|
305287 |
+ return when;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (st && rc == pcmk_ok) {
|
|
|
305287 |
+ rc = st->cmds->connect(st, "stonith-api", NULL);
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
|
|
|
305287 |
+ } else {
|
|
|
305287 |
int entries = 0;
|
|
|
305287 |
int progress = 0;
|
|
|
305287 |
int completed = 0;
|
|
|
305287 |
-
|
|
|
305287 |
+ char *name = NULL;
|
|
|
305287 |
+ enum stonith_call_options opts = st_opt_sync_call;
|
|
|
305287 |
+
|
|
|
305287 |
+ if (uname != NULL) {
|
|
|
305287 |
+ name = strdup(uname);
|
|
|
305287 |
+ } else if (nodeid > 0) {
|
|
|
305287 |
+ opts |= st_opt_cs_nodeid;
|
|
|
305287 |
+ name = crm_itoa(nodeid);
|
|
|
305287 |
+ }
|
|
|
305287 |
rc = st->cmds->history(st, opts, name, &history, 120);
|
|
|
305287 |
+ free(name);
|
|
|
305287 |
|
|
|
305287 |
for (hp = history; hp; hp = hp->next) {
|
|
|
305287 |
entries++;
|
|
|
305287 |
@@ -2227,15 +2256,11 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
|
|
|
305287 |
}
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
- if (st) {
|
|
|
305287 |
- st->cmds->disconnect(st);
|
|
|
305287 |
- stonith_api_delete(st);
|
|
|
305287 |
- }
|
|
|
305287 |
+ stonith_api_delete(st);
|
|
|
305287 |
|
|
|
305287 |
if(when) {
|
|
|
305287 |
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
|
|
|
305287 |
}
|
|
|
305287 |
- free(name);
|
|
|
305287 |
return when;
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
|
|
305287 |
index e101b62..bed0796 100644
|
|
|
305287 |
--- a/tools/crm_mon.c
|
|
|
305287 |
+++ b/tools/crm_mon.c
|
|
|
305287 |
@@ -298,7 +298,6 @@ cib_connect(gboolean full)
|
|
|
305287 |
}
|
|
|
305287 |
|
|
|
305287 |
if ((fence_connect) && (st->state == stonith_disconnected)) {
|
|
|
305287 |
- crm_trace("Connecting to stonith");
|
|
|
305287 |
rc = st->cmds->connect(st, crm_system_name, NULL);
|
|
|
305287 |
if (rc == pcmk_ok) {
|
|
|
305287 |
crm_trace("Setting up stonith callbacks");
|
|
|
305287 |
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
|
|
305287 |
index d960fb1..6be66c6 100644
|
|
|
305287 |
--- a/tools/stonith_admin.c
|
|
|
305287 |
+++ b/tools/stonith_admin.c
|
|
|
305287 |
@@ -198,31 +198,6 @@ struct {
|
|
|
305287 |
int rc;
|
|
|
305287 |
} async_fence_data;
|
|
|
305287 |
|
|
|
305287 |
-static int
|
|
|
305287 |
-try_mainloop_connect(void)
|
|
|
305287 |
-{
|
|
|
305287 |
- stonith_t *st = async_fence_data.st;
|
|
|
305287 |
- int tries = 10;
|
|
|
305287 |
- int i = 0;
|
|
|
305287 |
- int rc = 0;
|
|
|
305287 |
-
|
|
|
305287 |
- for (i = 0; i < tries; i++) {
|
|
|
305287 |
- crm_debug("Connecting as %s", async_fence_data.name);
|
|
|
305287 |
- rc = st->cmds->connect(st, async_fence_data.name, NULL);
|
|
|
305287 |
-
|
|
|
305287 |
- if (!rc) {
|
|
|
305287 |
- crm_debug("stonith client connection established");
|
|
|
305287 |
- return 0;
|
|
|
305287 |
- } else {
|
|
|
305287 |
- crm_debug("stonith client connection failed");
|
|
|
305287 |
- }
|
|
|
305287 |
- sleep(1);
|
|
|
305287 |
- }
|
|
|
305287 |
-
|
|
|
305287 |
- crm_err("Could not connect to the fencer");
|
|
|
305287 |
- return -1;
|
|
|
305287 |
-}
|
|
|
305287 |
-
|
|
|
305287 |
static void
|
|
|
305287 |
notify_callback(stonith_t * st, stonith_event_t * e)
|
|
|
305287 |
{
|
|
|
305287 |
@@ -251,8 +226,10 @@ async_fence_helper(gpointer user_data)
|
|
|
305287 |
{
|
|
|
305287 |
stonith_t *st = async_fence_data.st;
|
|
|
305287 |
int call_id = 0;
|
|
|
305287 |
+ int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
|
|
|
305287 |
|
|
|
305287 |
- if (try_mainloop_connect()) {
|
|
|
305287 |
+ if (rc != pcmk_ok) {
|
|
|
305287 |
+ fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
|
|
|
305287 |
g_main_loop_quit(mainloop);
|
|
|
305287 |
return TRUE;
|
|
|
305287 |
}
|
|
|
305287 |
--
|
|
|
305287 |
1.8.3.1
|
|
|
305287 |
|