Blame SOURCES/003-fencer-logs.patch

4c8e44
From 0a884f325e1049febc28bf0419ab307dd0bce5af Mon Sep 17 00:00:00 2001
4c8e44
From: Ken Gaillot <kgaillot@redhat.com>
4c8e44
Date: Thu, 16 May 2019 20:04:57 -0500
4c8e44
Subject: [PATCH] Log: various: improve fencer connection messages
4c8e44
4c8e44
Previously, log messages around fencer connections were inconsistent.
4c8e44
4c8e44
This attempts to make them more consistent by: having stonith_api_signon() log
4c8e44
only at debug level, letting the callers log at a level appropriate to the
4c8e44
situation using the return code; functionizing retrying a connection; and
4c8e44
using similar wording across clients.
4c8e44
4c8e44
This also does a bit of refactoring for better error checking and improved
4c8e44
efficiency.
4c8e44
---
4c8e44
 daemons/controld/controld_control.c  |   7 +-
4c8e44
 daemons/controld/controld_te_utils.c |  59 ++++++-----
4c8e44
 daemons/execd/pacemaker-execd.c      |  28 ++---
4c8e44
 daemons/fenced/cts-fence-helper.c    |  38 +++----
4c8e44
 include/crm/stonith-ng.h             |   4 +
4c8e44
 lib/fencing/st_client.c              | 195 ++++++++++++++++++++---------------
4c8e44
 tools/crm_mon.c                      |   1 -
4c8e44
 tools/stonith_admin.c                |  29 +-----
4c8e44
 8 files changed, 181 insertions(+), 180 deletions(-)
4c8e44
4c8e44
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
4c8e44
index 89b5b5d..6d9f335 100644
4c8e44
--- a/daemons/controld/controld_control.c
4c8e44
+++ b/daemons/controld/controld_control.c
4c8e44
@@ -628,10 +628,11 @@ do_started(long long action,
4c8e44
         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
4c8e44
     }
4c8e44
 
4c8e44
+    // Try connecting to fencer (retrying later in mainloop if failed)
4c8e44
     if (stonith_reconnect == NULL) {
4c8e44
-        int dummy;
4c8e44
-
4c8e44
-        stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
4c8e44
+        stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
4c8e44
+                                                 te_connect_stonith,
4c8e44
+                                                 GINT_TO_POINTER(TRUE));
4c8e44
     }
4c8e44
     set_bit(fsa_input_register, R_ST_REQUIRED);
4c8e44
     mainloop_set_trigger(stonith_reconnect);
4c8e44
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
4c8e44
index 5606ed6..22f83ad 100644
4c8e44
--- a/daemons/controld/controld_te_utils.c
4c8e44
+++ b/daemons/controld/controld_te_utils.c
4c8e44
@@ -1,5 +1,5 @@
4c8e44
 /*
4c8e44
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
4c8e44
+ * Copyright 2004-2019 the Pacemaker project contributors
4c8e44
  *
4c8e44
  * This source code is licensed under the GNU General Public License version 2
4c8e44
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
4c8e44
@@ -385,10 +385,18 @@ te_trigger_stonith_history_sync(void)
4c8e44
     mainloop_timer_start(stonith_history_sync_timer);
4c8e44
 }
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \brief Connect to fencer
4c8e44
+ *
4c8e44
+ * \param[in] user_data  If NULL, retry failures now, otherwise retry in main loop
4c8e44
+ *
4c8e44
+ * \return TRUE
4c8e44
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
4c8e44
+ *       30 attempts, meaning the controller could be blocked as long as 58s.
4c8e44
+ */
4c8e44
 gboolean
4c8e44
 te_connect_stonith(gpointer user_data)
4c8e44
 {
4c8e44
-    int lpc = 0;
4c8e44
     int rc = pcmk_ok;
4c8e44
 
4c8e44
     if (stonith_api == NULL) {
4c8e44
@@ -396,42 +404,41 @@ te_connect_stonith(gpointer user_data)
4c8e44
     }
4c8e44
 
4c8e44
     if (stonith_api->state != stonith_disconnected) {
4c8e44
-        crm_trace("Still connected");
4c8e44
+        crm_trace("Already connected to fencer, no need to retry");
4c8e44
         return TRUE;
4c8e44
     }
4c8e44
 
4c8e44
-    for (lpc = 0; lpc < 30; lpc++) {
4c8e44
-        crm_debug("Attempting connection to fencing daemon...");
4c8e44
-
4c8e44
-        sleep(1);
4c8e44
-        rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
4c8e44
-
4c8e44
-        if (rc == pcmk_ok) {
4c8e44
-            break;
4c8e44
+    if (user_data == NULL) {
4c8e44
+        // Blocking (retry failures now until successful)
4c8e44
+        rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
4c8e44
+        if (rc != pcmk_ok) {
4c8e44
+            crm_err("Could not connect to fencer in 30 attempts: %s "
4c8e44
+                    CRM_XS " rc=%d", pcmk_strerror(rc), rc);
4c8e44
         }
4c8e44
-
4c8e44
-        if (user_data != NULL) {
4c8e44
+    } else {
4c8e44
+        // Non-blocking (retry failures later in main loop)
4c8e44
+        rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
4c8e44
+        if (rc != pcmk_ok) {
4c8e44
             if (is_set(fsa_input_register, R_ST_REQUIRED)) {
4c8e44
-                crm_err("Sign-in failed: triggered a retry");
4c8e44
+                crm_err("Fencer connection failed (will retry): %s "
4c8e44
+                        CRM_XS " rc=%d", pcmk_strerror(rc), rc);
4c8e44
                 mainloop_set_trigger(stonith_reconnect);
4c8e44
             } else {
4c8e44
-                crm_info("Sign-in failed, but no longer required");
4c8e44
+                crm_info("Fencer connection failed (ignoring because no longer required): %s "
4c8e44
+                         CRM_XS " rc=%d", pcmk_strerror(rc), rc);
4c8e44
             }
4c8e44
             return TRUE;
4c8e44
         }
4c8e44
-
4c8e44
-        crm_err("Sign-in failed: pausing and trying again in 2s...");
4c8e44
-        sleep(1);
4c8e44
     }
4c8e44
 
4c8e44
-    CRM_CHECK(rc == pcmk_ok, return TRUE);      /* If not, we failed 30 times... just get out */
4c8e44
-    stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
4c8e44
-                                             tengine_stonith_connection_destroy);
4c8e44
-
4c8e44
-    stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
4c8e44
-                                             tengine_stonith_notify);
4c8e44
-
4c8e44
-    crm_trace("Connected");
4c8e44
+    if (rc == pcmk_ok) {
4c8e44
+        stonith_api->cmds->register_notification(stonith_api,
4c8e44
+                                                 T_STONITH_NOTIFY_DISCONNECT,
4c8e44
+                                                 tengine_stonith_connection_destroy);
4c8e44
+        stonith_api->cmds->register_notification(stonith_api,
4c8e44
+                                                 T_STONITH_NOTIFY_FENCE,
4c8e44
+                                                 tengine_stonith_notify);
4c8e44
+    }
4c8e44
     return TRUE;
4c8e44
 }
4c8e44
 
4c8e44
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
4c8e44
index 21bb0ed..e2fdfca 100644
4c8e44
--- a/daemons/execd/pacemaker-execd.c
4c8e44
+++ b/daemons/execd/pacemaker-execd.c
4c8e44
@@ -65,28 +65,20 @@ get_stonith_connection(void)
4c8e44
         stonith_api = NULL;
4c8e44
     }
4c8e44
 
4c8e44
-    if (!stonith_api) {
4c8e44
-        int rc = 0;
4c8e44
-        int tries = 10;
4c8e44
+    if (stonith_api == NULL) {
4c8e44
+        int rc = pcmk_ok;
4c8e44
 
4c8e44
         stonith_api = stonith_api_new();
4c8e44
-        do {
4c8e44
-            rc = stonith_api->cmds->connect(stonith_api, "pacemaker-execd", NULL);
4c8e44
-            if (rc == pcmk_ok) {
4c8e44
-                stonith_api->cmds->register_notification(stonith_api,
4c8e44
-                                                         T_STONITH_NOTIFY_DISCONNECT,
4c8e44
-                                                         stonith_connection_destroy_cb);
4c8e44
-                break;
4c8e44
-            }
4c8e44
-            sleep(1);
4c8e44
-            tries--;
4c8e44
-        } while (tries);
4c8e44
-
4c8e44
-        if (rc) {
4c8e44
-            crm_err("Unable to connect to stonith daemon to execute command. error: %s",
4c8e44
-                    pcmk_strerror(rc));
4c8e44
+        rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
4c8e44
+        if (rc != pcmk_ok) {
4c8e44
+            crm_err("Could not connect to fencer in 10 attempts: %s "
4c8e44
+                    CRM_XS " rc=%d", pcmk_strerror(rc), rc);
4c8e44
             stonith_api_delete(stonith_api);
4c8e44
             stonith_api = NULL;
4c8e44
+        } else {
4c8e44
+            stonith_api->cmds->register_notification(stonith_api,
4c8e44
+                                                     T_STONITH_NOTIFY_DISCONNECT,
4c8e44
+                                                     stonith_connection_destroy_cb);
4c8e44
         }
4c8e44
     }
4c8e44
     return stonith_api;
4c8e44
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
4c8e44
index c5ce1ab..4552fc1 100644
4c8e44
--- a/daemons/fenced/cts-fence-helper.c
4c8e44
+++ b/daemons/fenced/cts-fence-helper.c
4c8e44
@@ -1,5 +1,5 @@
4c8e44
 /*
4c8e44
- * Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
4c8e44
+ * Copyright 2009-2019 the Pacemaker project contributors
4c8e44
  *
4c8e44
  * This source code is licensed under the GNU General Public License version 2
4c8e44
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
4c8e44
@@ -124,8 +124,10 @@ passive_test(void)
4c8e44
     int rc = 0;
4c8e44
 
4c8e44
     rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
4c8e44
-    crm_debug("Connect: %d", rc);
4c8e44
-
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        stonith_api_delete(st);
4c8e44
+        crm_exit(CRM_EX_DISCONNECT);
4c8e44
+    }
4c8e44
     st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
4c8e44
     st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
4c8e44
     st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
4c8e44
@@ -271,8 +273,10 @@ sanity_tests(void)
4c8e44
     int rc = 0;
4c8e44
 
4c8e44
     rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
4c8e44
-    crm_debug("Connect: %d", rc);
4c8e44
-
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        stonith_api_delete(st);
4c8e44
+        crm_exit(CRM_EX_DISCONNECT);
4c8e44
+    }
4c8e44
     st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
4c8e44
     st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
4c8e44
     st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
4c8e44
@@ -295,7 +299,10 @@ standard_dev_test(void)
4c8e44
     stonith_key_value_t *params = NULL;
4c8e44
 
4c8e44
     rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
4c8e44
-    crm_debug("Connect: %d", rc);
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        stonith_api_delete(st);
4c8e44
+        crm_exit(CRM_EX_DISCONNECT);
4c8e44
+    }
4c8e44
 
4c8e44
     params = stonith_key_value_add(params, "pcmk_host_map", "some-host=pcmk-7 true_1_node1=3,4");
4c8e44
 
4c8e44
@@ -502,23 +509,12 @@ test_register_async_devices(int check_event)
4c8e44
 static void
4c8e44
 try_mainloop_connect(int check_event)
4c8e44
 {
4c8e44
-    int tries = 10;
4c8e44
-    int i = 0;
4c8e44
-    int rc = 0;
4c8e44
+    int rc = stonith_api_connect_retry(st, crm_system_name, 10);
4c8e44
 
4c8e44
-    for (i = 0; i < tries; i++) {
4c8e44
-        rc = st->cmds->connect(st, crm_system_name, NULL);
4c8e44
-
4c8e44
-        if (!rc) {
4c8e44
-            crm_info("stonith client connection established");
4c8e44
-            mainloop_test_done(TRUE);
4c8e44
-            return;
4c8e44
-        } else {
4c8e44
-            crm_info("stonith client connection failed");
4c8e44
-        }
4c8e44
-        sleep(1);
4c8e44
+    if (rc == pcmk_ok) {
4c8e44
+        mainloop_test_done(TRUE);
4c8e44
+        return;
4c8e44
     }
4c8e44
-
4c8e44
     crm_err("API CONNECTION FAILURE");
4c8e44
     mainloop_test_done(FALSE);
4c8e44
 }
4c8e44
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
4c8e44
index b7365a9..b640732 100644
4c8e44
--- a/include/crm/stonith-ng.h
4c8e44
+++ b/include/crm/stonith-ng.h
4c8e44
@@ -430,6 +430,10 @@ void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
4c8e44
 
4c8e44
 void stonith_history_free(stonith_history_t *history);
4c8e44
 
4c8e44
+// Convenience functions
4c8e44
+int stonith_api_connect_retry(stonith_t *st, const char *name,
4c8e44
+                              int max_attempts);
4c8e44
+
4c8e44
 /* Basic helpers that allows nodes to be fenced and the history to be
4c8e44
  * queried without mainloop or the caller understanding the full API
4c8e44
  *
4c8e44
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
4c8e44
index 270ef8d..ceee944 100644
4c8e44
--- a/lib/fencing/st_client.c
4c8e44
+++ b/lib/fencing/st_client.c
4c8e44
@@ -1,5 +1,5 @@
4c8e44
 /*
4c8e44
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
4c8e44
+ * Copyright 2004-2019 the Pacemaker project contributors
4c8e44
  *
4c8e44
  * This source code is licensed under the GNU Lesser General Public License
4c8e44
  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
4c8e44
@@ -1415,14 +1415,21 @@ static int
4c8e44
 stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
 {
4c8e44
     int rc = pcmk_ok;
4c8e44
-    stonith_private_t *native = stonith->st_private;
4c8e44
+    stonith_private_t *native = NULL;
4c8e44
+    const char *display_name = name? name : "client";
4c8e44
 
4c8e44
     static struct ipc_client_callbacks st_callbacks = {
4c8e44
         .dispatch = stonith_dispatch_internal,
4c8e44
         .destroy = stonith_connection_destroy
4c8e44
     };
4c8e44
 
4c8e44
-    crm_trace("Connecting command channel");
4c8e44
+    CRM_CHECK(stonith != NULL, return -EINVAL);
4c8e44
+
4c8e44
+    native = stonith->st_private;
4c8e44
+    CRM_ASSERT(native != NULL);
4c8e44
+
4c8e44
+    crm_debug("Attempting fencer connection by %s with%s mainloop",
4c8e44
+              display_name, (stonith_fd? "out" : ""));
4c8e44
 
4c8e44
     stonith->state = stonith_connected_command;
4c8e44
     if (stonith_fd) {
4c8e44
@@ -1432,8 +1439,9 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
         if (native->ipc && crm_ipc_connect(native->ipc)) {
4c8e44
             *stonith_fd = crm_ipc_get_fd(native->ipc);
4c8e44
         } else if (native->ipc) {
4c8e44
-            crm_perror(LOG_ERR, "Connection to fencer failed");
4c8e44
-            rc = -ENOTCONN;
4c8e44
+            crm_ipc_close(native->ipc);
4c8e44
+            crm_ipc_destroy(native->ipc);
4c8e44
+            native->ipc = NULL;
4c8e44
         }
4c8e44
 
4c8e44
     } else {
4c8e44
@@ -1444,11 +1452,8 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
     }
4c8e44
 
4c8e44
     if (native->ipc == NULL) {
4c8e44
-        crm_debug("Could not connect to the Stonith API");
4c8e44
         rc = -ENOTCONN;
4c8e44
-    }
4c8e44
-
4c8e44
-    if (rc == pcmk_ok) {
4c8e44
+    } else {
4c8e44
         xmlNode *reply = NULL;
4c8e44
         xmlNode *hello = create_xml_node(NULL, "stonith_command");
4c8e44
 
4c8e44
@@ -1458,11 +1463,12 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
         rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
4c8e44
 
4c8e44
         if (rc < 0) {
4c8e44
-            crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
4c8e44
+            crm_debug("Couldn't register with the fencer: %s "
4c8e44
+                      CRM_XS " rc=%d", pcmk_strerror(rc), rc);
4c8e44
             rc = -ECOMM;
4c8e44
 
4c8e44
         } else if (reply == NULL) {
4c8e44
-            crm_err("Did not receive registration reply");
4c8e44
+            crm_debug("Couldn't register with the fencer: no reply");
4c8e44
             rc = -EPROTO;
4c8e44
 
4c8e44
         } else {
4c8e44
@@ -1470,18 +1476,23 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
             const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
4c8e44
 
4c8e44
             if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
4c8e44
-                crm_err("Invalid registration message: %s", msg_type);
4c8e44
-                crm_log_xml_err(reply, "Bad reply");
4c8e44
+                crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
4c8e44
+                          (msg_type? msg_type : "(missing)"));
4c8e44
+                crm_log_xml_debug(reply, "Invalid fencer reply");
4c8e44
                 rc = -EPROTO;
4c8e44
 
4c8e44
             } else if (tmp_ticket == NULL) {
4c8e44
-                crm_err("No registration token provided");
4c8e44
-                crm_log_xml_err(reply, "Bad reply");
4c8e44
+                crm_debug("Couldn't register with the fencer: no token in reply");
4c8e44
+                crm_log_xml_debug(reply, "Invalid fencer reply");
4c8e44
                 rc = -EPROTO;
4c8e44
 
4c8e44
             } else {
4c8e44
-                crm_trace("Obtained registration token: %s", tmp_ticket);
4c8e44
                 native->token = strdup(tmp_ticket);
4c8e44
+#if HAVE_MSGFROMIPC_TIMEOUT
4c8e44
+                stonith->call_timeout = MAX_IPC_DELAY;
4c8e44
+#endif
4c8e44
+                crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
4c8e44
+                          display_name, native->token);
4c8e44
                 rc = pcmk_ok;
4c8e44
             }
4c8e44
         }
4c8e44
@@ -1490,16 +1501,11 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
4c8e44
         free_xml(hello);
4c8e44
     }
4c8e44
 
4c8e44
-    if (rc == pcmk_ok) {
4c8e44
-#if HAVE_MSGFROMIPC_TIMEOUT
4c8e44
-        stonith->call_timeout = MAX_IPC_DELAY;
4c8e44
-#endif
4c8e44
-        crm_debug("Connection to fencer successful");
4c8e44
-        return pcmk_ok;
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        crm_debug("Connection attempt to fencer by %s failed: %s "
4c8e44
+                  CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
4c8e44
+        stonith->cmds->disconnect(stonith);
4c8e44
     }
4c8e44
-
4c8e44
-    crm_debug("Connection to fencer failed: %s", pcmk_strerror(rc));
4c8e44
-    stonith->cmds->disconnect(stonith);
4c8e44
     return rc;
4c8e44
 }
4c8e44
 
4c8e44
@@ -2071,6 +2077,36 @@ stonith_api_new(void)
4c8e44
     return new_stonith;
4c8e44
 }
4c8e44
 
4c8e44
+/*!
4c8e44
+ * \brief Make a blocking connection attempt to the fencer
4c8e44
+ *
4c8e44
+ * \param[in,out] st            Fencer API object
4c8e44
+ * \param[in]     name          Client name to use with fencer
4c8e44
+ * \param[in]     max_attempts  Return error if this many attempts fail
4c8e44
+ *
4c8e44
+ * \return pcmk_ok on success, result of last attempt otherwise
4c8e44
+ */
4c8e44
+int
4c8e44
+stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
4c8e44
+{
4c8e44
+    int rc = -EINVAL; // if max_attempts is not positive
4c8e44
+
4c8e44
+    for (int attempt = 1; attempt <= max_attempts; attempt++) {
4c8e44
+        rc = st->cmds->connect(st, name, NULL);
4c8e44
+        if (rc == pcmk_ok) {
4c8e44
+            return pcmk_ok;
4c8e44
+        } else if (attempt < max_attempts) {
4c8e44
+            crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
4c8e44
+                       CRM_XS " rc=%d",
4c8e44
+                       attempt, max_attempts, pcmk_strerror(rc), rc);
4c8e44
+            sleep(2);
4c8e44
+        }
4c8e44
+    }
4c8e44
+    crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d",
4c8e44
+               pcmk_strerror(rc), rc);
4c8e44
+    return rc;
4c8e44
+}
4c8e44
+
4c8e44
 stonith_key_value_t *
4c8e44
 stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
4c8e44
 {
4c8e44
@@ -2122,85 +2158,78 @@ stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
4c8e44
 int
4c8e44
 stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
4c8e44
 {
4c8e44
-    char *name = NULL;
4c8e44
-    const char *action = "reboot";
4c8e44
-
4c8e44
-    int rc = -EPROTO;
4c8e44
-    stonith_t *st = NULL;
4c8e44
-    enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
4c8e44
+    int rc = pcmk_ok;
4c8e44
+    stonith_t *st = stonith_api_new();
4c8e44
+    const char *action = off? "off" : "reboot";
4c8e44
 
4c8e44
     api_log_open();
4c8e44
-    st = stonith_api_new();
4c8e44
-    if (st) {
4c8e44
-        rc = st->cmds->connect(st, "stonith-api", NULL);
4c8e44
-        if(rc != pcmk_ok) {
4c8e44
-            api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
4c8e44
-        }
4c8e44
+    if (st == NULL) {
4c8e44
+        api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
4c8e44
+                action, nodeid, uname);
4c8e44
+        return -EPROTO;
4c8e44
     }
4c8e44
 
4c8e44
-    if (uname != NULL) {
4c8e44
-        name = strdup(uname);
4c8e44
-
4c8e44
-    } else if (nodeid > 0) {
4c8e44
-        opts |= st_opt_cs_nodeid;
4c8e44
-        name = crm_itoa(nodeid);
4c8e44
-    }
4c8e44
-
4c8e44
-    if (off) {
4c8e44
-        action = "off";
4c8e44
-    }
4c8e44
-
4c8e44
-    if (rc == pcmk_ok) {
4c8e44
+    rc = st->cmds->connect(st, "stonith-api", NULL);
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
4c8e44
+                action, nodeid, uname, pcmk_strerror(rc), rc);
4c8e44
+    } else {
4c8e44
+        char *name = NULL;
4c8e44
+        enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
4c8e44
+
4c8e44
+        if (uname != NULL) {
4c8e44
+            name = strdup(uname);
4c8e44
+        } else if (nodeid > 0) {
4c8e44
+            opts |= st_opt_cs_nodeid;
4c8e44
+            name = crm_itoa(nodeid);
4c8e44
+        }
4c8e44
         rc = st->cmds->fence(st, opts, name, action, timeout, 0);
4c8e44
-        if(rc != pcmk_ok) {
4c8e44
-            api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
4c8e44
+        free(name);
4c8e44
+
4c8e44
+        if (rc != pcmk_ok) {
4c8e44
+            api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
4c8e44
+                    action, nodeid, uname, pcmk_strerror(rc), rc);
4c8e44
         } else {
4c8e44
-            api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action);
4c8e44
+            api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
4c8e44
         }
4c8e44
     }
4c8e44
 
4c8e44
-    if (st) {
4c8e44
-        st->cmds->disconnect(st);
4c8e44
-        stonith_api_delete(st);
4c8e44
-    }
4c8e44
-
4c8e44
-    free(name);
4c8e44
+    stonith_api_delete(st);
4c8e44
     return rc;
4c8e44
 }
4c8e44
 
4c8e44
 time_t
4c8e44
 stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
4c8e44
 {
4c8e44
-    int rc = 0;
4c8e44
-    char *name = NULL;
4c8e44
-
4c8e44
+    int rc = pcmk_ok;
4c8e44
     time_t when = 0;
4c8e44
-    stonith_t *st = NULL;
4c8e44
+    stonith_t *st = stonith_api_new();
4c8e44
     stonith_history_t *history = NULL, *hp = NULL;
4c8e44
-    enum stonith_call_options opts = st_opt_sync_call;
4c8e44
-
4c8e44
-    st = stonith_api_new();
4c8e44
-    if (st) {
4c8e44
-        rc = st->cmds->connect(st, "stonith-api", NULL);
4c8e44
-        if(rc != pcmk_ok) {
4c8e44
-            api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
4c8e44
-        }
4c8e44
-    }
4c8e44
-
4c8e44
-    if (uname != NULL) {
4c8e44
-        name = strdup(uname);
4c8e44
 
4c8e44
-    } else if (nodeid > 0) {
4c8e44
-        opts |= st_opt_cs_nodeid;
4c8e44
-        name = crm_itoa(nodeid);
4c8e44
+    if (st == NULL) {
4c8e44
+        api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
4c8e44
+                "API initialization failed", nodeid, uname);
4c8e44
+        return when;
4c8e44
     }
4c8e44
 
4c8e44
-    if (st && rc == pcmk_ok) {
4c8e44
+    rc = st->cmds->connect(st, "stonith-api", NULL);
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
4c8e44
+    } else {
4c8e44
         int entries = 0;
4c8e44
         int progress = 0;
4c8e44
         int completed = 0;
4c8e44
-
4c8e44
+        char *name = NULL;
4c8e44
+        enum stonith_call_options opts = st_opt_sync_call;
4c8e44
+
4c8e44
+        if (uname != NULL) {
4c8e44
+            name = strdup(uname);
4c8e44
+        } else if (nodeid > 0) {
4c8e44
+            opts |= st_opt_cs_nodeid;
4c8e44
+            name = crm_itoa(nodeid);
4c8e44
+        }
4c8e44
         rc = st->cmds->history(st, opts, name, &history, 120);
4c8e44
+        free(name);
4c8e44
 
4c8e44
         for (hp = history; hp; hp = hp->next) {
4c8e44
             entries++;
4c8e44
@@ -2227,15 +2256,11 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
4c8e44
         }
4c8e44
     }
4c8e44
 
4c8e44
-    if (st) {
4c8e44
-        st->cmds->disconnect(st);
4c8e44
-        stonith_api_delete(st);
4c8e44
-    }
4c8e44
+    stonith_api_delete(st);
4c8e44
 
4c8e44
     if(when) {
4c8e44
         api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
4c8e44
     }
4c8e44
-    free(name);
4c8e44
     return when;
4c8e44
 }
4c8e44
 
4c8e44
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
4c8e44
index e101b62..bed0796 100644
4c8e44
--- a/tools/crm_mon.c
4c8e44
+++ b/tools/crm_mon.c
4c8e44
@@ -298,7 +298,6 @@ cib_connect(gboolean full)
4c8e44
     }
4c8e44
 
4c8e44
     if ((fence_connect) && (st->state == stonith_disconnected)) {
4c8e44
-        crm_trace("Connecting to stonith");
4c8e44
         rc = st->cmds->connect(st, crm_system_name, NULL);
4c8e44
         if (rc == pcmk_ok) {
4c8e44
             crm_trace("Setting up stonith callbacks");
4c8e44
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
4c8e44
index d960fb1..6be66c6 100644
4c8e44
--- a/tools/stonith_admin.c
4c8e44
+++ b/tools/stonith_admin.c
4c8e44
@@ -198,31 +198,6 @@ struct {
4c8e44
     int rc;
4c8e44
 } async_fence_data;
4c8e44
 
4c8e44
-static int
4c8e44
-try_mainloop_connect(void)
4c8e44
-{
4c8e44
-    stonith_t *st = async_fence_data.st;
4c8e44
-    int tries = 10;
4c8e44
-    int i = 0;
4c8e44
-    int rc = 0;
4c8e44
-
4c8e44
-    for (i = 0; i < tries; i++) {
4c8e44
-        crm_debug("Connecting as %s", async_fence_data.name);
4c8e44
-        rc = st->cmds->connect(st, async_fence_data.name, NULL);
4c8e44
-
4c8e44
-        if (!rc) {
4c8e44
-            crm_debug("stonith client connection established");
4c8e44
-            return 0;
4c8e44
-        } else {
4c8e44
-            crm_debug("stonith client connection failed");
4c8e44
-        }
4c8e44
-        sleep(1);
4c8e44
-    }
4c8e44
-
4c8e44
-    crm_err("Could not connect to the fencer");
4c8e44
-    return -1;
4c8e44
-}
4c8e44
-
4c8e44
 static void
4c8e44
 notify_callback(stonith_t * st, stonith_event_t * e)
4c8e44
 {
4c8e44
@@ -251,8 +226,10 @@ async_fence_helper(gpointer user_data)
4c8e44
 {
4c8e44
     stonith_t *st = async_fence_data.st;
4c8e44
     int call_id = 0;
4c8e44
+    int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
4c8e44
 
4c8e44
-    if (try_mainloop_connect()) {
4c8e44
+    if (rc != pcmk_ok) {
4c8e44
+        fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
4c8e44
         g_main_loop_quit(mainloop);
4c8e44
         return TRUE;
4c8e44
     }
4c8e44
-- 
4c8e44
1.8.3.1
4c8e44