Blame SOURCES/003-fencing-reasons.patch

97a979
From 8e6362cb2129bd56f817d449a195f3da87a545fa Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 12 Nov 2021 14:28:56 -0600
97a979
Subject: [PATCH 01/13] Refactor: libcrmcommon,fencer: convenience macro for
97a979
 initializing results
97a979
97a979
for future reuse
97a979
---
97a979
 daemons/fenced/fenced_commands.c      | 14 ++------------
97a979
 include/crm/common/results_internal.h | 15 +++++++++++++++
97a979
 2 files changed, 17 insertions(+), 12 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 87600573e..9f2f1cc40 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -388,12 +388,7 @@ static void
97a979
 report_internal_result(async_command_t *cmd, int exit_status,
97a979
                        int execution_status, const char *exit_reason)
97a979
 {
97a979
-    pcmk__action_result_t result = {
97a979
-        // Ensure we don't pass garbage to free()
97a979
-        .exit_reason = NULL,
97a979
-        .action_stdout = NULL,
97a979
-        .action_stderr = NULL
97a979
-    };
97a979
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
97a979
 
97a979
     pcmk__set_result(&result, exit_status, execution_status, exit_reason);
97a979
     cmd->done_cb(0, &result, cmd);
97a979
@@ -2616,12 +2611,7 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
97a979
     }
97a979
 
97a979
     if (device == NULL) { // No device found
97a979
-        pcmk__action_result_t result = {
97a979
-            // Ensure we don't pass garbage to free()
97a979
-            .exit_reason = NULL,
97a979
-            .action_stdout = NULL,
97a979
-            .action_stderr = NULL
97a979
-        };
97a979
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
97a979
 
97a979
         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
97a979
                          "No fence device configured for target");
97a979
diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h
97a979
index 804bf2a7a..6befaa0ed 100644
97a979
--- a/include/crm/common/results_internal.h
97a979
+++ b/include/crm/common/results_internal.h
97a979
@@ -30,6 +30,21 @@ typedef struct {
97a979
     char *action_stderr;    // Action error output
97a979
 } pcmk__action_result_t;
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Static initialization for an action result
97a979
+ *
97a979
+ * \note Importantly, this ensures pcmk__reset_result() won't try to free
97a979
+ *       garbage.
97a979
+ */
97a979
+#define PCMK__UNKNOWN_RESULT {                  \
97a979
+        .exit_status = CRM_EX_OK,               \
97a979
+        .execution_status = PCMK_EXEC_UNKNOWN,  \
97a979
+        .exit_reason = NULL,                    \
97a979
+        .action_stdout = NULL,                  \
97a979
+        .action_stderr = NULL,                  \
97a979
+    }
97a979
+
97a979
 void pcmk__set_result(pcmk__action_result_t *result, int exit_status,
97a979
                       enum pcmk_exec_status exec_status,
97a979
                       const char *exit_reason);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 0937c92476ac737a5f5146932824bde8bdd7db98 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 12 Nov 2021 16:02:27 -0600
97a979
Subject: [PATCH 02/13] Refactor: various: add convenience function for
97a979
 checking result success
97a979
97a979
A successful pcmk__action_result_t has both exit status CRM_EX_OK (a.k.a
97a979
PCMK_OCF_OK) and execution status PCMK_EXEC_DONE. Since checking that is
97a979
clunky, we sometimes just check exit status, which is less than ideal.
97a979
97a979
The convenience function makes it easy to check both, and improves readability.
97a979
---
97a979
 daemons/controld/controld_remote_ra.c |  4 ++--
97a979
 daemons/execd/execd_commands.c        | 12 ++++++------
97a979
 daemons/fenced/fenced_commands.c      | 14 ++++++--------
97a979
 include/crm/common/results_internal.h | 16 ++++++++++++++++
97a979
 lib/fencing/st_client.c               |  4 ++--
97a979
 lib/fencing/st_rhcs.c                 |  2 +-
97a979
 6 files changed, 33 insertions(+), 19 deletions(-)
97a979
97a979
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
97a979
index 74cbfd673..55ac162c7 100644
97a979
--- a/daemons/controld/controld_remote_ra.c
97a979
+++ b/daemons/controld/controld_remote_ra.c
97a979
@@ -297,7 +297,7 @@ static void
97a979
 check_remote_node_state(remote_ra_cmd_t *cmd)
97a979
 {
97a979
     /* Only successful actions can change node state */
97a979
-    if (cmd->result.exit_status != PCMK_OCF_OK) {
97a979
+    if (!pcmk__result_ok(&(cmd->result))) {
97a979
         return;
97a979
     }
97a979
 
97a979
@@ -365,7 +365,7 @@ report_remote_ra_result(remote_ra_cmd_t * cmd)
97a979
     lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
97a979
                      cmd->result.exit_reason);
97a979
 
97a979
-    if (cmd->reported_success && (cmd->result.exit_status != PCMK_OCF_OK)) {
97a979
+    if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) {
97a979
         op.t_rcchange = (unsigned int) time(NULL);
97a979
         /* This edge case will likely never ever occur, but if it does the
97a979
          * result is that a failure will not be processed correctly. This is only
97a979
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
97a979
index 667525039..02070bf11 100644
97a979
--- a/daemons/execd/execd_commands.c
97a979
+++ b/daemons/execd/execd_commands.c
97a979
@@ -878,7 +878,7 @@ action_complete(svc_action_t * action)
97a979
     }
97a979
 
97a979
     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
97a979
-        if ((cmd->result.exit_status == PCMK_OCF_OK)
97a979
+        if (pcmk__result_ok(&(cmd->result))
97a979
             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
97a979
             /* systemd returns from start and stop actions after the action
97a979
              * begins, not after it completes. We have to jump through a few
97a979
@@ -894,7 +894,7 @@ action_complete(svc_action_t * action)
97a979
             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
97a979
                 goagain = true;
97a979
 
97a979
-            } else if ((cmd->result.exit_status == PCMK_OCF_OK)
97a979
+            } else if (pcmk__result_ok(&(cmd->result))
97a979
                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
97a979
                 goagain = true;
97a979
 
97a979
@@ -927,12 +927,12 @@ action_complete(svc_action_t * action)
97a979
 #if SUPPORT_NAGIOS
97a979
     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
97a979
         if (action_matches(cmd, "monitor", 0)
97a979
-            && (cmd->result.exit_status == PCMK_OCF_OK)) {
97a979
+            && pcmk__result_ok(&(cmd->result))) {
97a979
             /* Successfully executed --version for the nagios plugin */
97a979
             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
97a979
 
97a979
         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
97a979
-                   && (cmd->result.exit_status != PCMK_OCF_OK)) {
97a979
+                   && !pcmk__result_ok(&(cmd->result))) {
97a979
 #ifdef PCMK__TIME_USE_CGT
97a979
             goagain = true;
97a979
 #endif
97a979
@@ -955,7 +955,7 @@ action_complete(svc_action_t * action)
97a979
             cmd->start_delay = delay;
97a979
             cmd->timeout = timeout_left;
97a979
 
97a979
-            if (cmd->result.exit_status == PCMK_OCF_OK) {
97a979
+            if (pcmk__result_ok(&(cmd->result))) {
97a979
                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
97a979
                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
97a979
 
97a979
@@ -1066,7 +1066,7 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
97a979
                                                          cmd->interval_ms, rc);
97a979
 
97a979
         // Certain successful actions change the known state of the resource
97a979
-        if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) {
97a979
+        if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
97a979
             if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
97a979
                 rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
97a979
             } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 9f2f1cc40..26501a4b3 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -1188,8 +1188,7 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
97a979
 
97a979
     mainloop_set_trigger(dev->work);
97a979
 
97a979
-    if ((result->execution_status == PCMK_EXEC_DONE)
97a979
-        && (result->exit_status == CRM_EX_OK)) {
97a979
+    if (pcmk__result_ok(result)) {
97a979
         crm_info("Refreshing target list for %s", dev->id);
97a979
         g_list_free_full(dev->targets, free);
97a979
         dev->targets = stonith__parse_targets(result->action_stdout);
97a979
@@ -2310,15 +2309,14 @@ log_async_result(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
     GString *msg = g_string_sized_new(80); // Reasonable starting size
97a979
 
97a979
     // Choose log levels appropriately if we have a result
97a979
-    if ((result->execution_status == PCMK_EXEC_DONE)
97a979
-        && (result->exit_status == CRM_EX_OK))  { // Success
97a979
+    if (pcmk__result_ok(result)) {
97a979
         log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE;
97a979
         if ((result->action_stdout != NULL)
97a979
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
97a979
             output_log_level = LOG_DEBUG;
97a979
         }
97a979
         next = NULL;
97a979
-    } else { // Failure
97a979
+    } else {
97a979
         log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR;
97a979
         if ((result->action_stdout != NULL)
97a979
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
97a979
@@ -2482,7 +2480,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
     /* The device is ready to do something else now */
97a979
     device = g_hash_table_lookup(device_list, cmd->device);
97a979
     if (device) {
97a979
-        if (!device->verified && (result->exit_status == CRM_EX_OK) &&
97a979
+        if (!device->verified && pcmk__result_ok(result) &&
97a979
             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
97a979
 
97a979
             device->verified = TRUE;
97a979
@@ -2491,7 +2489,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
         mainloop_set_trigger(device->work);
97a979
     }
97a979
 
97a979
-    if (result->exit_status == CRM_EX_OK) {
97a979
+    if (pcmk__result_ok(result)) {
97a979
         GList *iter;
97a979
         /* see if there are any required devices left to execute for this op */
97a979
         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
97a979
@@ -2523,7 +2521,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 
97a979
     send_async_reply(cmd, result, pid, false);
97a979
 
97a979
-    if (result->exit_status != CRM_EX_OK) {
97a979
+    if (!pcmk__result_ok(result)) {
97a979
         goto done;
97a979
     }
97a979
 
97a979
diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h
97a979
index 6befaa0ed..0c5833937 100644
97a979
--- a/include/crm/common/results_internal.h
97a979
+++ b/include/crm/common/results_internal.h
97a979
@@ -54,4 +54,20 @@ void pcmk__set_result_output(pcmk__action_result_t *result,
97a979
 
97a979
 void pcmk__reset_result(pcmk__action_result_t *result);
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Check whether a result is OK
97a979
+ *
97a979
+ * \param[in] result
97a979
+ *
97a979
+ * \return true if the result's exit status is CRM_EX_OK and its
97a979
+ *         execution status is PCMK_EXEC_DONE, otherwise false
97a979
+ */
97a979
+static inline bool
97a979
+pcmk__result_ok(const pcmk__action_result_t *result)
97a979
+{
97a979
+    return (result != NULL) && (result->exit_status == CRM_EX_OK)
97a979
+            && (result->execution_status == PCMK_EXEC_DONE);
97a979
+}
97a979
+
97a979
 #endif // PCMK__COMMON_RESULTS_INTERNAL__H
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 2fbff7f24..af461d0d4 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -760,7 +760,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
97a979
         default:                        break;
97a979
     }
97a979
 
97a979
-    if (result->exit_status == CRM_EX_OK) {
97a979
+    if (pcmk__result_ok(result)) {
97a979
         return pcmk_rc_ok;
97a979
     }
97a979
 
97a979
@@ -797,7 +797,7 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
 
97a979
     log_action(action, action->pid);
97a979
 
97a979
-    if ((action->result.exit_status != CRM_EX_OK)
97a979
+    if (!pcmk__result_ok(&(action->result))
97a979
         && update_remaining_timeout(action)) {
97a979
 
97a979
         int rc = internal_stonith_action_execute(action);
97a979
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
97a979
index 6c8cbedc7..865e04bc2 100644
97a979
--- a/lib/fencing/st_rhcs.c
97a979
+++ b/lib/fencing/st_rhcs.c
97a979
@@ -148,7 +148,7 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
97a979
         return rc;
97a979
     }
97a979
 
97a979
-    if (result->exit_status != CRM_EX_OK) {
97a979
+    if (!pcmk__result_ok(result)) {
97a979
         crm_warn("Metadata action for %s returned error code %d",
97a979
                  agent, result->exit_status);
97a979
         rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 4c39ff00a0c028354a9da7f80986f7e34b05ba08 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 12 Nov 2021 16:07:01 -0600
97a979
Subject: [PATCH 03/13] Low: fencing: improve mapping of execution status to
97a979
 legacy return code
97a979
97a979
PCMK_EXEC_PENDING is likely not possible with the current code, but map it to
97a979
EINPROGRESS for completeness.
97a979
97a979
PCMK_EXEC_INVALID is not yet used by the fencer but will be.
97a979
---
97a979
 lib/fencing/st_client.c | 30 ++++++++++++++++++++++++++----
97a979
 1 file changed, 26 insertions(+), 4 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index af461d0d4..93513e9f3 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -749,7 +749,12 @@ update_remaining_timeout(stonith_action_t * action)
97a979
 int
97a979
 stonith__result2rc(const pcmk__action_result_t *result)
97a979
 {
97a979
+    if (pcmk__result_ok(result)) {
97a979
+        return pcmk_rc_ok;
97a979
+    }
97a979
+
97a979
     switch (result->execution_status) {
97a979
+        case PCMK_EXEC_PENDING:         return EINPROGRESS;
97a979
         case PCMK_EXEC_CANCELLED:       return ECANCELED;
97a979
         case PCMK_EXEC_TIMEOUT:         return ETIME;
97a979
         case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
97a979
@@ -757,11 +762,28 @@ stonith__result2rc(const pcmk__action_result_t *result)
97a979
         case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
97a979
         case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
97a979
         case PCMK_EXEC_NO_SECRETS:      return EACCES;
97a979
-        default:                        break;
97a979
-    }
97a979
 
97a979
-    if (pcmk__result_ok(result)) {
97a979
-        return pcmk_rc_ok;
97a979
+        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
97a979
+         * operations that don't involve executing an agent (for example,
97a979
+         * registering devices). This allows us to use the CRM_EX_* codes in the
97a979
+         * exit status for finer-grained responses.
97a979
+         */
97a979
+        case PCMK_EXEC_INVALID:
97a979
+            switch (result->exit_status) {
97a979
+                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
97a979
+                case CRM_EX_PROTOCOL:           return EPROTO;
97a979
+
97a979
+               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
97a979
+                * over from a previous instance of the fencer. For API backward
97a979
+                * compatibility, this is mapped to the previously used code for
97a979
+                * this case, EHOSTUNREACH.
97a979
+                */
97a979
+                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
97a979
+                default:                        break;
97a979
+            }
97a979
+
97a979
+        default:
97a979
+            break;
97a979
     }
97a979
 
97a979
     // Try to provide useful error code based on result's error output
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 4e638783d1cd7c9398a603fc6df7e9d868262b16 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 18 Nov 2021 11:41:12 -0600
97a979
Subject: [PATCH 04/13] Refactor: libstonithd: separate action-related code
97a979
 into own source file
97a979
97a979
Everything related to stonith_action_t has been moved from st_client.c to a new
97a979
st_actions.c, since st_client.c was ridiculously large, and the action stuff
97a979
isn't all client-related. No code was changed.
97a979
97a979
Before:
97a979
   2804 st_client.c
97a979
97a979
After:
97a979
   545 lib/fencing/st_actions.c
97a979
  2278 lib/fencing/st_client.c
97a979
---
97a979
 lib/fencing/Makefile.am  |   2 +-
97a979
 lib/fencing/st_actions.c | 545 +++++++++++++++++++++++++++++++++++++++
97a979
 lib/fencing/st_client.c  | 528 +------------------------------------
97a979
 3 files changed, 547 insertions(+), 528 deletions(-)
97a979
 create mode 100644 lib/fencing/st_actions.c
97a979
97a979
diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am
97a979
index 205c4873d..dac215c16 100644
97a979
--- a/lib/fencing/Makefile.am
97a979
+++ b/lib/fencing/Makefile.am
97a979
@@ -22,7 +22,7 @@ libstonithd_la_LDFLAGS	+= $(LDFLAGS_HARDENED_LIB)
97a979
 libstonithd_la_LIBADD	= $(top_builddir)/lib/common/libcrmcommon.la
97a979
 libstonithd_la_LIBADD   += $(top_builddir)/lib/services/libcrmservice.la
97a979
 
97a979
-libstonithd_la_SOURCES	= st_client.c st_output.c st_rhcs.c
97a979
+libstonithd_la_SOURCES	= st_actions.c st_client.c st_output.c st_rhcs.c
97a979
 if BUILD_LHA_SUPPORT
97a979
 libstonithd_la_SOURCES	+= st_lha.c
97a979
 endif
97a979
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
97a979
new file mode 100644
97a979
index 000000000..64d3afd5d
97a979
--- /dev/null
97a979
+++ b/lib/fencing/st_actions.c
97a979
@@ -0,0 +1,545 @@
97a979
+/*
97a979
+ * Copyright 2004-2021 the Pacemaker project contributors
97a979
+ *
97a979
+ * The version control history for this file may have further details.
97a979
+ *
97a979
+ * This source code is licensed under the GNU Lesser General Public License
97a979
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
97a979
+ */
97a979
+
97a979
+#include <crm_internal.h>
97a979
+
97a979
+#include <stdlib.h>
97a979
+#include <stdio.h>
97a979
+#include <string.h>
97a979
+#include <libgen.h>
97a979
+#include <inttypes.h>
97a979
+#include <sys/types.h>
97a979
+#include <glib.h>
97a979
+
97a979
+#include <crm/crm.h>
97a979
+#include <crm/stonith-ng.h>
97a979
+#include <crm/fencing/internal.h>
97a979
+#include <crm/msg_xml.h>
97a979
+#include <crm/services_internal.h>
97a979
+
97a979
+#include "fencing_private.h"
97a979
+
97a979
+struct stonith_action_s {
97a979
+    /*! user defined data */
97a979
+    char *agent;
97a979
+    char *action;
97a979
+    char *victim;
97a979
+    GHashTable *args;
97a979
+    int timeout;
97a979
+    int async;
97a979
+    void *userdata;
97a979
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
97a979
+                     void *user_data);
97a979
+    void (*fork_cb) (int pid, void *user_data);
97a979
+
97a979
+    svc_action_t *svc_action;
97a979
+
97a979
+    /*! internal timing information */
97a979
+    time_t initial_start_time;
97a979
+    int tries;
97a979
+    int remaining_timeout;
97a979
+    int max_retries;
97a979
+
97a979
+    int pid;
97a979
+    pcmk__action_result_t result;
97a979
+};
97a979
+
97a979
+static int internal_stonith_action_execute(stonith_action_t *action);
97a979
+static void log_action(stonith_action_t *action, pid_t pid);
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Set an action's result based on services library result
97a979
+ *
97a979
+ * \param[in] action      Fence action to set result for
97a979
+ * \param[in] svc_action  Service action to get result from
97a979
+ */
97a979
+static void
97a979
+set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
97a979
+{
97a979
+    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
97a979
+                     services__exit_reason(svc_action));
97a979
+    pcmk__set_result_output(&(action->result),
97a979
+                            services__grab_stdout(svc_action),
97a979
+                            services__grab_stderr(svc_action));
97a979
+}
97a979
+
97a979
+static void
97a979
+log_action(stonith_action_t *action, pid_t pid)
97a979
+{
97a979
+    /* The services library has already logged the output at info or debug
97a979
+     * level, so just raise to warning for stderr.
97a979
+     */
97a979
+    if (action->result.action_stderr != NULL) {
97a979
+        /* Logging the whole string confuses syslog when the string is xml */
97a979
+        char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
97a979
+
97a979
+        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
97a979
+        free(prefix);
97a979
+    }
97a979
+}
97a979
+
97a979
+static void
97a979
+append_config_arg(gpointer key, gpointer value, gpointer user_data)
97a979
+{
97a979
+    /* The fencer will filter "action" out when it registers the device,
97a979
+     * but ignore it here in case any external API users don't.
97a979
+     *
97a979
+     * Also filter out parameters handled directly by Pacemaker.
97a979
+     */
97a979
+    if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei)
97a979
+        && !pcmk_stonith_param(key)
97a979
+        && (strstr(key, CRM_META) == NULL)
97a979
+        && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) {
97a979
+
97a979
+        crm_trace("Passing %s=%s with fence action",
97a979
+                  (const char *) key, (const char *) (value? value : ""));
97a979
+        g_hash_table_insert((GHashTable *) user_data,
97a979
+                            strdup(key), strdup(value? value : ""));
97a979
+    }
97a979
+}
97a979
+
97a979
+static GHashTable *
97a979
+make_args(const char *agent, const char *action, const char *victim,
97a979
+          uint32_t victim_nodeid, GHashTable * device_args,
97a979
+          GHashTable * port_map, const char *host_arg)
97a979
+{
97a979
+    GHashTable *arg_list = NULL;
97a979
+    const char *value = NULL;
97a979
+
97a979
+    CRM_CHECK(action != NULL, return NULL);
97a979
+
97a979
+    arg_list = pcmk__strkey_table(free, free);
97a979
+
97a979
+    // Add action to arguments (using an alias if requested)
97a979
+    if (device_args) {
97a979
+        char buffer[512];
97a979
+
97a979
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
97a979
+        value = g_hash_table_lookup(device_args, buffer);
97a979
+        if (value) {
97a979
+            crm_debug("Substituting '%s' for fence action %s targeting %s",
97a979
+                      value, action, victim);
97a979
+            action = value;
97a979
+        }
97a979
+    }
97a979
+    g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP),
97a979
+                        strdup(action));
97a979
+
97a979
+    /* If this is a fencing operation against another node, add more standard
97a979
+     * arguments.
97a979
+     */
97a979
+    if (victim && device_args) {
97a979
+        const char *param = NULL;
97a979
+
97a979
+        /* Always pass the target's name, per
97a979
+         * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md
97a979
+         */
97a979
+        g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim));
97a979
+
97a979
+        // If the target's node ID was specified, pass it, too
97a979
+        if (victim_nodeid) {
97a979
+            char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid);
97a979
+
97a979
+            // cts-fencing looks for this log message
97a979
+            crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s",
97a979
+                     nodeid, action, victim);
97a979
+            g_hash_table_insert(arg_list, strdup("nodeid"), nodeid);
97a979
+        }
97a979
+
97a979
+        // Check whether target must be specified in some other way
97a979
+        param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT);
97a979
+        if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none)
97a979
+            && !pcmk__str_eq(param, "none", pcmk__str_casei)) {
97a979
+
97a979
+            if (param == NULL) {
97a979
+                /* Use the caller's default for pcmk_host_argument, or "port" if
97a979
+                 * none was given
97a979
+                 */
97a979
+                param = (host_arg == NULL)? "port" : host_arg;
97a979
+            }
97a979
+            value = g_hash_table_lookup(device_args, param);
97a979
+
97a979
+            if (pcmk__str_eq(value, "dynamic",
97a979
+                             pcmk__str_casei|pcmk__str_null_matches)) {
97a979
+                /* If the host argument was "dynamic" or not explicitly specified,
97a979
+                 * add it with the target
97a979
+                 */
97a979
+                const char *alias = NULL;
97a979
+
97a979
+                if (port_map) {
97a979
+                    alias = g_hash_table_lookup(port_map, victim);
97a979
+                }
97a979
+                if (alias == NULL) {
97a979
+                    alias = victim;
97a979
+                }
97a979
+                crm_debug("Passing %s='%s' with fence action %s targeting %s",
97a979
+                          param, alias, action, victim);
97a979
+                g_hash_table_insert(arg_list, strdup(param), strdup(alias));
97a979
+            }
97a979
+        }
97a979
+    }
97a979
+
97a979
+    if (device_args) {
97a979
+        g_hash_table_foreach(device_args, append_config_arg, arg_list);
97a979
+    }
97a979
+
97a979
+    return arg_list;
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Free all memory used by a stonith action
97a979
+ *
97a979
+ * \param[in,out] action  Action to free
97a979
+ */
97a979
+void
97a979
+stonith__destroy_action(stonith_action_t *action)
97a979
+{
97a979
+    if (action) {
97a979
+        free(action->agent);
97a979
+        if (action->args) {
97a979
+            g_hash_table_destroy(action->args);
97a979
+        }
97a979
+        free(action->action);
97a979
+        free(action->victim);
97a979
+        if (action->svc_action) {
97a979
+            services_action_free(action->svc_action);
97a979
+        }
97a979
+        pcmk__reset_result(&(action->result));
97a979
+        free(action);
97a979
+    }
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Get the result of an executed stonith action
97a979
+ *
97a979
+ * \param[in] action  Executed action
97a979
+ *
97a979
+ * \return Pointer to action's result (or NULL if \p action is NULL)
97a979
+ */
97a979
+pcmk__action_result_t *
97a979
+stonith__action_result(stonith_action_t *action)
97a979
+{
97a979
+    return (action == NULL)? NULL : &(action->result);
97a979
+}
97a979
+
97a979
+#define FAILURE_MAX_RETRIES 2
97a979
+stonith_action_t *
97a979
+stonith_action_create(const char *agent,
97a979
+                      const char *_action,
97a979
+                      const char *victim,
97a979
+                      uint32_t victim_nodeid,
97a979
+                      int timeout, GHashTable * device_args,
97a979
+                      GHashTable * port_map, const char *host_arg)
97a979
+{
97a979
+    stonith_action_t *action;
97a979
+
97a979
+    action = calloc(1, sizeof(stonith_action_t));
97a979
+    action->args = make_args(agent, _action, victim, victim_nodeid,
97a979
+                             device_args, port_map, host_arg);
97a979
+    crm_debug("Preparing '%s' action for %s using agent %s",
97a979
+              _action, (victim? victim : "no target"), agent);
97a979
+    action->agent = strdup(agent);
97a979
+    action->action = strdup(_action);
97a979
+    if (victim) {
97a979
+        action->victim = strdup(victim);
97a979
+    }
97a979
+    action->timeout = action->remaining_timeout = timeout;
97a979
+    action->max_retries = FAILURE_MAX_RETRIES;
97a979
+
97a979
+    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
97a979
+                     "Initialization bug in fencing library");
97a979
+
97a979
+    if (device_args) {
97a979
+        char buffer[512];
97a979
+        const char *value = NULL;
97a979
+
97a979
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
97a979
+        value = g_hash_table_lookup(device_args, buffer);
97a979
+
97a979
+        if (value) {
97a979
+            action->max_retries = atoi(value);
97a979
+        }
97a979
+    }
97a979
+
97a979
+    return action;
97a979
+}
97a979
+
97a979
+static gboolean
97a979
+update_remaining_timeout(stonith_action_t * action)
97a979
+{
97a979
+    int diff = time(NULL) - action->initial_start_time;
97a979
+
97a979
+    if (action->tries >= action->max_retries) {
97a979
+        crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
97a979
+                 action->agent, action->action, action->max_retries);
97a979
+        action->remaining_timeout = 0;
97a979
+    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
97a979
+               && (diff < (action->timeout * 0.7))) {
97a979
+        /* only set remaining timeout period if there is 30%
97a979
+         * or greater of the original timeout period left */
97a979
+        action->remaining_timeout = action->timeout - diff;
97a979
+    } else {
97a979
+        action->remaining_timeout = 0;
97a979
+    }
97a979
+    return action->remaining_timeout ? TRUE : FALSE;
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Map a fencing action result to a standard return code
97a979
+ *
97a979
+ * \param[in] result  Fencing action result to map
97a979
+ *
97a979
+ * \return Standard Pacemaker return code that best corresponds to \p result
97a979
+ */
97a979
+int
97a979
+stonith__result2rc(const pcmk__action_result_t *result)
97a979
+{
97a979
+    if (pcmk__result_ok(result)) {
97a979
+        return pcmk_rc_ok;
97a979
+    }
97a979
+
97a979
+    switch (result->execution_status) {
97a979
+        case PCMK_EXEC_PENDING:         return EINPROGRESS;
97a979
+        case PCMK_EXEC_CANCELLED:       return ECANCELED;
97a979
+        case PCMK_EXEC_TIMEOUT:         return ETIME;
97a979
+        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
97a979
+        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
97a979
+        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
97a979
+        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
97a979
+        case PCMK_EXEC_NO_SECRETS:      return EACCES;
97a979
+
97a979
+        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
97a979
+         * operations that don't involve executing an agent (for example,
97a979
+         * registering devices). This allows us to use the CRM_EX_* codes in the
97a979
+         * exit status for finer-grained responses.
97a979
+         */
97a979
+        case PCMK_EXEC_INVALID:
97a979
+            switch (result->exit_status) {
97a979
+                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
97a979
+                case CRM_EX_PROTOCOL:           return EPROTO;
97a979
+
97a979
+               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
97a979
+                * over from a previous instance of the fencer. For API backward
97a979
+                * compatibility, this is mapped to the previously used code for
97a979
+                * this case, EHOSTUNREACH.
97a979
+                */
97a979
+                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
97a979
+                default:                        break;
97a979
+            }
97a979
+
97a979
+        default:
97a979
+            break;
97a979
+    }
97a979
+
97a979
+    // Try to provide useful error code based on result's error output
97a979
+
97a979
+    if (result->action_stderr == NULL) {
97a979
+        return ENODATA;
97a979
+
97a979
+    } else if (strcasestr(result->action_stderr, "timed out")
97a979
+               || strcasestr(result->action_stderr, "timeout")) {
97a979
+        return ETIME;
97a979
+
97a979
+    } else if (strcasestr(result->action_stderr, "unrecognised action")
97a979
+               || strcasestr(result->action_stderr, "unrecognized action")
97a979
+               || strcasestr(result->action_stderr, "unsupported action")) {
97a979
+        return EOPNOTSUPP;
97a979
+    }
97a979
+
97a979
+    // Oh well, we tried
97a979
+    return pcmk_rc_error;
97a979
+}
97a979
+
97a979
+static void
97a979
+stonith_action_async_done(svc_action_t *svc_action)
97a979
+{
97a979
+    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
97a979
+
97a979
+    set_result_from_svc_action(action, svc_action);
97a979
+
97a979
+    svc_action->params = NULL;
97a979
+
97a979
+    crm_debug("Child process %d performing action '%s' exited with rc %d",
97a979
+                action->pid, action->action, svc_action->rc);
97a979
+
97a979
+    log_action(action, action->pid);
97a979
+
97a979
+    if (!pcmk__result_ok(&(action->result))
97a979
+        && update_remaining_timeout(action)) {
97a979
+
97a979
+        int rc = internal_stonith_action_execute(action);
97a979
+        if (rc == pcmk_ok) {
97a979
+            return;
97a979
+        }
97a979
+    }
97a979
+
97a979
+    if (action->done_cb) {
97a979
+        action->done_cb(action->pid, &(action->result), action->userdata);
97a979
+    }
97a979
+
97a979
+    action->svc_action = NULL; // don't remove our caller
97a979
+    stonith__destroy_action(action);
97a979
+}
97a979
+
97a979
+static void
97a979
+stonith_action_async_forked(svc_action_t *svc_action)
97a979
+{
97a979
+    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
97a979
+
97a979
+    action->pid = svc_action->pid;
97a979
+    action->svc_action = svc_action;
97a979
+
97a979
+    if (action->fork_cb) {
97a979
+        (action->fork_cb) (svc_action->pid, action->userdata);
97a979
+    }
97a979
+
97a979
+    crm_trace("Child process %d performing action '%s' successfully forked",
97a979
+              action->pid, action->action);
97a979
+}
97a979
+
97a979
+static int
97a979
+internal_stonith_action_execute(stonith_action_t * action)
97a979
+{
97a979
+    int rc = -EPROTO;
97a979
+    int is_retry = 0;
97a979
+    svc_action_t *svc_action = NULL;
97a979
+    static int stonith_sequence = 0;
97a979
+    char *buffer = NULL;
97a979
+
97a979
+    CRM_CHECK(action != NULL, return -EINVAL);
97a979
+
97a979
+    if ((action->action == NULL) || (action->args == NULL)
97a979
+        || (action->agent == NULL)) {
97a979
+        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
97a979
+                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
97a979
+        return -EINVAL;
97a979
+    }
97a979
+
97a979
+    if (!action->tries) {
97a979
+        action->initial_start_time = time(NULL);
97a979
+    }
97a979
+    action->tries++;
97a979
+
97a979
+    if (action->tries > 1) {
97a979
+        crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
97a979
+                 action->tries, action->agent, action->action, action->remaining_timeout);
97a979
+        is_retry = 1;
97a979
+    }
97a979
+
97a979
+    buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s",
97a979
+                               basename(action->agent));
97a979
+    svc_action = services_action_create_generic(buffer, NULL);
97a979
+    free(buffer);
97a979
+
97a979
+    if (svc_action->rc != PCMK_OCF_UNKNOWN) {
97a979
+        set_result_from_svc_action(action, svc_action);
97a979
+        services_action_free(svc_action);
97a979
+        return -E2BIG;
97a979
+    }
97a979
+
97a979
+    svc_action->timeout = 1000 * action->remaining_timeout;
97a979
+    svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH);
97a979
+    svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent),
97a979
+                                       action->action, action->tries);
97a979
+    svc_action->agent = strdup(action->agent);
97a979
+    svc_action->sequence = stonith_sequence++;
97a979
+    svc_action->params = action->args;
97a979
+    svc_action->cb_data = (void *) action;
97a979
+    svc_action->flags = pcmk__set_flags_as(__func__, __LINE__,
97a979
+                                           LOG_TRACE, "Action",
97a979
+                                           svc_action->id, svc_action->flags,
97a979
+                                           SVC_ACTION_NON_BLOCKED,
97a979
+                                           "SVC_ACTION_NON_BLOCKED");
97a979
+
97a979
+    /* keep retries from executing out of control and free previous results */
97a979
+    if (is_retry) {
97a979
+        pcmk__reset_result(&(action->result));
97a979
+        sleep(1);
97a979
+    }
97a979
+
97a979
+    if (action->async) {
97a979
+        /* async */
97a979
+        if (services_action_async_fork_notify(svc_action,
97a979
+                                              &stonith_action_async_done,
97a979
+                                              &stonith_action_async_forked)) {
97a979
+            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
97a979
+                             PCMK_EXEC_PENDING, NULL);
97a979
+            return pcmk_ok;
97a979
+        }
97a979
+
97a979
+    } else if (services_action_sync(svc_action)) { // sync success
97a979
+        rc = pcmk_ok;
97a979
+
97a979
+    } else { // sync failure
97a979
+        rc = -ECONNABORTED;
97a979
+    }
97a979
+
97a979
+    set_result_from_svc_action(action, svc_action);
97a979
+    svc_action->params = NULL;
97a979
+    services_action_free(svc_action);
97a979
+    return rc;
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Kick off execution of an async stonith action
97a979
+ *
97a979
+ * \param[in,out] action        Action to be executed
97a979
+ * \param[in,out] userdata      Datapointer to be passed to callbacks
97a979
+ * \param[in]     done          Callback to notify action has failed/succeeded
97a979
+ * \param[in]     fork_callback Callback to notify successful fork of child
97a979
+ *
97a979
+ * \return pcmk_ok if ownership of action has been taken, -errno otherwise
97a979
+ */
97a979
+int
97a979
+stonith_action_execute_async(stonith_action_t * action,
97a979
+                             void *userdata,
97a979
+                             void (*done) (int pid,
97a979
+                                           const pcmk__action_result_t *result,
97a979
+                                           void *user_data),
97a979
+                             void (*fork_cb) (int pid, void *user_data))
97a979
+{
97a979
+    if (!action) {
97a979
+        return -EINVAL;
97a979
+    }
97a979
+
97a979
+    action->userdata = userdata;
97a979
+    action->done_cb = done;
97a979
+    action->fork_cb = fork_cb;
97a979
+    action->async = 1;
97a979
+
97a979
+    return internal_stonith_action_execute(action);
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Execute a stonith action
97a979
+ *
97a979
+ * \param[in,out] action  Action to execute
97a979
+ *
97a979
+ * \return pcmk_ok on success, -errno otherwise
97a979
+ */
97a979
+int
97a979
+stonith__execute(stonith_action_t *action)
97a979
+{
97a979
+    int rc = pcmk_ok;
97a979
+
97a979
+    CRM_CHECK(action != NULL, return -EINVAL);
97a979
+
97a979
+    // Keep trying until success, max retries, or timeout
97a979
+    do {
97a979
+        rc = internal_stonith_action_execute(action);
97a979
+    } while ((rc != pcmk_ok) && update_remaining_timeout(action));
97a979
+
97a979
+    return rc;
97a979
+}
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 93513e9f3..944cd1863 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -8,28 +8,20 @@
97a979
  */
97a979
 
97a979
 #include <crm_internal.h>
97a979
-#include <unistd.h>
97a979
+
97a979
 #include <stdlib.h>
97a979
 #include <stdio.h>
97a979
 #include <stdbool.h>
97a979
 #include <string.h>
97a979
 #include <ctype.h>
97a979
-#include <libgen.h>
97a979
 #include <inttypes.h>
97a979
-
97a979
-#include <sys/stat.h>
97a979
 #include <sys/types.h>
97a979
-#include <sys/wait.h>
97a979
-
97a979
 #include <glib.h>
97a979
 
97a979
 #include <crm/crm.h>
97a979
 #include <crm/stonith-ng.h>
97a979
 #include <crm/fencing/internal.h>
97a979
 #include <crm/msg_xml.h>
97a979
-#include <crm/common/xml.h>
97a979
-#include <crm/common/xml_internal.h>
97a979
-#include <crm/services_internal.h>
97a979
 
97a979
 #include <crm/common/mainloop.h>
97a979
 
97a979
@@ -37,31 +29,6 @@
97a979
 
97a979
 CRM_TRACE_INIT_DATA(stonith);
97a979
 
97a979
-struct stonith_action_s {
97a979
-    /*! user defined data */
97a979
-    char *agent;
97a979
-    char *action;
97a979
-    char *victim;
97a979
-    GHashTable *args;
97a979
-    int timeout;
97a979
-    int async;
97a979
-    void *userdata;
97a979
-    void (*done_cb) (int pid, const pcmk__action_result_t *result,
97a979
-                     void *user_data);
97a979
-    void (*fork_cb) (int pid, void *user_data);
97a979
-
97a979
-    svc_action_t *svc_action;
97a979
-
97a979
-    /*! internal timing information */
97a979
-    time_t initial_start_time;
97a979
-    int tries;
97a979
-    int remaining_timeout;
97a979
-    int max_retries;
97a979
-
97a979
-    int pid;
97a979
-    pcmk__action_result_t result;
97a979
-};
97a979
-
97a979
 typedef struct stonith_private_s {
97a979
     char *token;
97a979
     crm_ipc_t *ipc;
97a979
@@ -118,8 +85,6 @@ static int stonith_send_command(stonith_t *stonith, const char *op,
97a979
 
97a979
 static void stonith_connection_destroy(gpointer user_data);
97a979
 static void stonith_send_notification(gpointer data, gpointer user_data);
97a979
-static int internal_stonith_action_execute(stonith_action_t * action);
97a979
-static void log_action(stonith_action_t *action, pid_t pid);
97a979
 
97a979
 /*!
97a979
  * \brief Get agent namespace by name
97a979
@@ -196,23 +161,6 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
97a979
     return st_namespace_invalid;
97a979
 }
97a979
 
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Set an action's result based on services library result
97a979
- *
97a979
- * \param[in] action      Fence action to set result for
97a979
- * \param[in] svc_action  Service action to get result from
97a979
- */
97a979
-static void
97a979
-set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
97a979
-{
97a979
-    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
97a979
-                     services__exit_reason(svc_action));
97a979
-    pcmk__set_result_output(&(action->result),
97a979
-                            services__grab_stdout(svc_action),
97a979
-                            services__grab_stderr(svc_action));
97a979
-}
97a979
-
97a979
 gboolean
97a979
 stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
97a979
 {
97a979
@@ -273,21 +221,6 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
97a979
     return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
97a979
 }
97a979
 
97a979
-static void
97a979
-log_action(stonith_action_t *action, pid_t pid)
97a979
-{
97a979
-    /* The services library has already logged the output at info or debug
97a979
-     * level, so just raise to warning for stderr.
97a979
-     */
97a979
-    if (action->result.action_stderr != NULL) {
97a979
-        /* Logging the whole string confuses syslog when the string is xml */
97a979
-        char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
97a979
-
97a979
-        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
97a979
-        free(prefix);
97a979
-    }
97a979
-}
97a979
-
97a979
 /* when cycling through the list we don't want to delete items
97a979
    so just mark them and when we know nobody is using the list
97a979
    loop over it to remove the marked items
97a979
@@ -530,465 +463,6 @@ stonith_api_register_level(stonith_t * st, int options, const char *node, int le
97a979
                                            level, device_list);
97a979
 }
97a979
 
97a979
-static void
97a979
-append_config_arg(gpointer key, gpointer value, gpointer user_data)
97a979
-{
97a979
-    /* The fencer will filter "action" out when it registers the device,
97a979
-     * but ignore it here in case any external API users don't.
97a979
-     *
97a979
-     * Also filter out parameters handled directly by Pacemaker.
97a979
-     */
97a979
-    if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei)
97a979
-        && !pcmk_stonith_param(key)
97a979
-        && (strstr(key, CRM_META) == NULL)
97a979
-        && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) {
97a979
-
97a979
-        crm_trace("Passing %s=%s with fence action",
97a979
-                  (const char *) key, (const char *) (value? value : ""));
97a979
-        g_hash_table_insert((GHashTable *) user_data,
97a979
-                            strdup(key), strdup(value? value : ""));
97a979
-    }
97a979
-}
97a979
-
97a979
-static GHashTable *
97a979
-make_args(const char *agent, const char *action, const char *victim,
97a979
-          uint32_t victim_nodeid, GHashTable * device_args,
97a979
-          GHashTable * port_map, const char *host_arg)
97a979
-{
97a979
-    GHashTable *arg_list = NULL;
97a979
-    const char *value = NULL;
97a979
-
97a979
-    CRM_CHECK(action != NULL, return NULL);
97a979
-
97a979
-    arg_list = pcmk__strkey_table(free, free);
97a979
-
97a979
-    // Add action to arguments (using an alias if requested)
97a979
-    if (device_args) {
97a979
-        char buffer[512];
97a979
-
97a979
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
97a979
-        value = g_hash_table_lookup(device_args, buffer);
97a979
-        if (value) {
97a979
-            crm_debug("Substituting '%s' for fence action %s targeting %s",
97a979
-                      value, action, victim);
97a979
-            action = value;
97a979
-        }
97a979
-    }
97a979
-    g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP),
97a979
-                        strdup(action));
97a979
-
97a979
-    /* If this is a fencing operation against another node, add more standard
97a979
-     * arguments.
97a979
-     */
97a979
-    if (victim && device_args) {
97a979
-        const char *param = NULL;
97a979
-
97a979
-        /* Always pass the target's name, per
97a979
-         * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md
97a979
-         */
97a979
-        g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim));
97a979
-
97a979
-        // If the target's node ID was specified, pass it, too
97a979
-        if (victim_nodeid) {
97a979
-            char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid);
97a979
-
97a979
-            // cts-fencing looks for this log message
97a979
-            crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s",
97a979
-                     nodeid, action, victim);
97a979
-            g_hash_table_insert(arg_list, strdup("nodeid"), nodeid);
97a979
-        }
97a979
-
97a979
-        // Check whether target must be specified in some other way
97a979
-        param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT);
97a979
-        if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none)
97a979
-            && !pcmk__str_eq(param, "none", pcmk__str_casei)) {
97a979
-
97a979
-            if (param == NULL) {
97a979
-                /* Use the caller's default for pcmk_host_argument, or "port" if
97a979
-                 * none was given
97a979
-                 */
97a979
-                param = (host_arg == NULL)? "port" : host_arg;
97a979
-            }
97a979
-            value = g_hash_table_lookup(device_args, param);
97a979
-
97a979
-            if (pcmk__str_eq(value, "dynamic",
97a979
-                             pcmk__str_casei|pcmk__str_null_matches)) {
97a979
-                /* If the host argument was "dynamic" or not explicitly specified,
97a979
-                 * add it with the target
97a979
-                 */
97a979
-                const char *alias = NULL;
97a979
-
97a979
-                if (port_map) {
97a979
-                    alias = g_hash_table_lookup(port_map, victim);
97a979
-                }
97a979
-                if (alias == NULL) {
97a979
-                    alias = victim;
97a979
-                }
97a979
-                crm_debug("Passing %s='%s' with fence action %s targeting %s",
97a979
-                          param, alias, action, victim);
97a979
-                g_hash_table_insert(arg_list, strdup(param), strdup(alias));
97a979
-            }
97a979
-        }
97a979
-    }
97a979
-
97a979
-    if (device_args) {
97a979
-        g_hash_table_foreach(device_args, append_config_arg, arg_list);
97a979
-    }
97a979
-
97a979
-    return arg_list;
97a979
-}
97a979
-
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Free all memory used by a stonith action
97a979
- *
97a979
- * \param[in,out] action  Action to free
97a979
- */
97a979
-void
97a979
-stonith__destroy_action(stonith_action_t *action)
97a979
-{
97a979
-    if (action) {
97a979
-        free(action->agent);
97a979
-        if (action->args) {
97a979
-            g_hash_table_destroy(action->args);
97a979
-        }
97a979
-        free(action->action);
97a979
-        free(action->victim);
97a979
-        if (action->svc_action) {
97a979
-            services_action_free(action->svc_action);
97a979
-        }
97a979
-        pcmk__reset_result(&(action->result));
97a979
-        free(action);
97a979
-    }
97a979
-}
97a979
-
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Get the result of an executed stonith action
97a979
- *
97a979
- * \param[in] action  Executed action
97a979
- *
97a979
- * \return Pointer to action's result (or NULL if \p action is NULL)
97a979
- */
97a979
-pcmk__action_result_t *
97a979
-stonith__action_result(stonith_action_t *action)
97a979
-{
97a979
-    return (action == NULL)? NULL : &(action->result);
97a979
-}
97a979
-
97a979
-#define FAILURE_MAX_RETRIES 2
97a979
-stonith_action_t *
97a979
-stonith_action_create(const char *agent,
97a979
-                      const char *_action,
97a979
-                      const char *victim,
97a979
-                      uint32_t victim_nodeid,
97a979
-                      int timeout, GHashTable * device_args,
97a979
-                      GHashTable * port_map, const char *host_arg)
97a979
-{
97a979
-    stonith_action_t *action;
97a979
-
97a979
-    action = calloc(1, sizeof(stonith_action_t));
97a979
-    action->args = make_args(agent, _action, victim, victim_nodeid,
97a979
-                             device_args, port_map, host_arg);
97a979
-    crm_debug("Preparing '%s' action for %s using agent %s",
97a979
-              _action, (victim? victim : "no target"), agent);
97a979
-    action->agent = strdup(agent);
97a979
-    action->action = strdup(_action);
97a979
-    if (victim) {
97a979
-        action->victim = strdup(victim);
97a979
-    }
97a979
-    action->timeout = action->remaining_timeout = timeout;
97a979
-    action->max_retries = FAILURE_MAX_RETRIES;
97a979
-
97a979
-    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
97a979
-                     "Initialization bug in fencing library");
97a979
-
97a979
-    if (device_args) {
97a979
-        char buffer[512];
97a979
-        const char *value = NULL;
97a979
-
97a979
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
97a979
-        value = g_hash_table_lookup(device_args, buffer);
97a979
-
97a979
-        if (value) {
97a979
-            action->max_retries = atoi(value);
97a979
-        }
97a979
-    }
97a979
-
97a979
-    return action;
97a979
-}
97a979
-
97a979
-static gboolean
97a979
-update_remaining_timeout(stonith_action_t * action)
97a979
-{
97a979
-    int diff = time(NULL) - action->initial_start_time;
97a979
-
97a979
-    if (action->tries >= action->max_retries) {
97a979
-        crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
97a979
-                 action->agent, action->action, action->max_retries);
97a979
-        action->remaining_timeout = 0;
97a979
-    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
97a979
-               && (diff < (action->timeout * 0.7))) {
97a979
-        /* only set remaining timeout period if there is 30%
97a979
-         * or greater of the original timeout period left */
97a979
-        action->remaining_timeout = action->timeout - diff;
97a979
-    } else {
97a979
-        action->remaining_timeout = 0;
97a979
-    }
97a979
-    return action->remaining_timeout ? TRUE : FALSE;
97a979
-}
97a979
-
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Map a fencing action result to a standard return code
97a979
- *
97a979
- * \param[in] result  Fencing action result to map
97a979
- *
97a979
- * \return Standard Pacemaker return code that best corresponds to \p result
97a979
- */
97a979
-int
97a979
-stonith__result2rc(const pcmk__action_result_t *result)
97a979
-{
97a979
-    if (pcmk__result_ok(result)) {
97a979
-        return pcmk_rc_ok;
97a979
-    }
97a979
-
97a979
-    switch (result->execution_status) {
97a979
-        case PCMK_EXEC_PENDING:         return EINPROGRESS;
97a979
-        case PCMK_EXEC_CANCELLED:       return ECANCELED;
97a979
-        case PCMK_EXEC_TIMEOUT:         return ETIME;
97a979
-        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
97a979
-        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
97a979
-        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
97a979
-        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
97a979
-        case PCMK_EXEC_NO_SECRETS:      return EACCES;
97a979
-
97a979
-        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
97a979
-         * operations that don't involve executing an agent (for example,
97a979
-         * registering devices). This allows us to use the CRM_EX_* codes in the
97a979
-         * exit status for finer-grained responses.
97a979
-         */
97a979
-        case PCMK_EXEC_INVALID:
97a979
-            switch (result->exit_status) {
97a979
-                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
97a979
-                case CRM_EX_PROTOCOL:           return EPROTO;
97a979
-
97a979
-               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
97a979
-                * over from a previous instance of the fencer. For API backward
97a979
-                * compatibility, this is mapped to the previously used code for
97a979
-                * this case, EHOSTUNREACH.
97a979
-                */
97a979
-                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
97a979
-                default:                        break;
97a979
-            }
97a979
-
97a979
-        default:
97a979
-            break;
97a979
-    }
97a979
-
97a979
-    // Try to provide useful error code based on result's error output
97a979
-
97a979
-    if (result->action_stderr == NULL) {
97a979
-        return ENODATA;
97a979
-
97a979
-    } else if (strcasestr(result->action_stderr, "timed out")
97a979
-               || strcasestr(result->action_stderr, "timeout")) {
97a979
-        return ETIME;
97a979
-
97a979
-    } else if (strcasestr(result->action_stderr, "unrecognised action")
97a979
-               || strcasestr(result->action_stderr, "unrecognized action")
97a979
-               || strcasestr(result->action_stderr, "unsupported action")) {
97a979
-        return EOPNOTSUPP;
97a979
-    }
97a979
-
97a979
-    // Oh well, we tried
97a979
-    return pcmk_rc_error;
97a979
-}
97a979
-
97a979
-static void
97a979
-stonith_action_async_done(svc_action_t *svc_action)
97a979
-{
97a979
-    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
97a979
-
97a979
-    set_result_from_svc_action(action, svc_action);
97a979
-
97a979
-    svc_action->params = NULL;
97a979
-
97a979
-    crm_debug("Child process %d performing action '%s' exited with rc %d",
97a979
-                action->pid, action->action, svc_action->rc);
97a979
-
97a979
-    log_action(action, action->pid);
97a979
-
97a979
-    if (!pcmk__result_ok(&(action->result))
97a979
-        && update_remaining_timeout(action)) {
97a979
-
97a979
-        int rc = internal_stonith_action_execute(action);
97a979
-        if (rc == pcmk_ok) {
97a979
-            return;
97a979
-        }
97a979
-    }
97a979
-
97a979
-    if (action->done_cb) {
97a979
-        action->done_cb(action->pid, &(action->result), action->userdata);
97a979
-    }
97a979
-
97a979
-    action->svc_action = NULL; // don't remove our caller
97a979
-    stonith__destroy_action(action);
97a979
-}
97a979
-
97a979
-static void
97a979
-stonith_action_async_forked(svc_action_t *svc_action)
97a979
-{
97a979
-    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
97a979
-
97a979
-    action->pid = svc_action->pid;
97a979
-    action->svc_action = svc_action;
97a979
-
97a979
-    if (action->fork_cb) {
97a979
-        (action->fork_cb) (svc_action->pid, action->userdata);
97a979
-    }
97a979
-
97a979
-    crm_trace("Child process %d performing action '%s' successfully forked",
97a979
-              action->pid, action->action);
97a979
-}
97a979
-
97a979
-static int
97a979
-internal_stonith_action_execute(stonith_action_t * action)
97a979
-{
97a979
-    int rc = -EPROTO;
97a979
-    int is_retry = 0;
97a979
-    svc_action_t *svc_action = NULL;
97a979
-    static int stonith_sequence = 0;
97a979
-    char *buffer = NULL;
97a979
-
97a979
-    CRM_CHECK(action != NULL, return -EINVAL);
97a979
-
97a979
-    if ((action->action == NULL) || (action->args == NULL)
97a979
-        || (action->agent == NULL)) {
97a979
-        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
97a979
-                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
97a979
-        return -EINVAL;
97a979
-    }
97a979
-
97a979
-    if (!action->tries) {
97a979
-        action->initial_start_time = time(NULL);
97a979
-    }
97a979
-    action->tries++;
97a979
-
97a979
-    if (action->tries > 1) {
97a979
-        crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
97a979
-                 action->tries, action->agent, action->action, action->remaining_timeout);
97a979
-        is_retry = 1;
97a979
-    }
97a979
-
97a979
-    buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s",
97a979
-                               basename(action->agent));
97a979
-    svc_action = services_action_create_generic(buffer, NULL);
97a979
-    free(buffer);
97a979
-
97a979
-    if (svc_action->rc != PCMK_OCF_UNKNOWN) {
97a979
-        set_result_from_svc_action(action, svc_action);
97a979
-        services_action_free(svc_action);
97a979
-        return -E2BIG;
97a979
-    }
97a979
-
97a979
-    svc_action->timeout = 1000 * action->remaining_timeout;
97a979
-    svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH);
97a979
-    svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent),
97a979
-                                       action->action, action->tries);
97a979
-    svc_action->agent = strdup(action->agent);
97a979
-    svc_action->sequence = stonith_sequence++;
97a979
-    svc_action->params = action->args;
97a979
-    svc_action->cb_data = (void *) action;
97a979
-    svc_action->flags = pcmk__set_flags_as(__func__, __LINE__,
97a979
-                                           LOG_TRACE, "Action",
97a979
-                                           svc_action->id, svc_action->flags,
97a979
-                                           SVC_ACTION_NON_BLOCKED,
97a979
-                                           "SVC_ACTION_NON_BLOCKED");
97a979
-
97a979
-    /* keep retries from executing out of control and free previous results */
97a979
-    if (is_retry) {
97a979
-        pcmk__reset_result(&(action->result));
97a979
-        sleep(1);
97a979
-    }
97a979
-
97a979
-    if (action->async) {
97a979
-        /* async */
97a979
-        if (services_action_async_fork_notify(svc_action,
97a979
-                                              &stonith_action_async_done,
97a979
-                                              &stonith_action_async_forked)) {
97a979
-            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
97a979
-                             PCMK_EXEC_PENDING, NULL);
97a979
-            return pcmk_ok;
97a979
-        }
97a979
-
97a979
-    } else if (services_action_sync(svc_action)) { // sync success
97a979
-        rc = pcmk_ok;
97a979
-
97a979
-    } else { // sync failure
97a979
-        rc = -ECONNABORTED;
97a979
-    }
97a979
-
97a979
-    set_result_from_svc_action(action, svc_action);
97a979
-    svc_action->params = NULL;
97a979
-    services_action_free(svc_action);
97a979
-    return rc;
97a979
-}
97a979
-
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Kick off execution of an async stonith action
97a979
- *
97a979
- * \param[in,out] action        Action to be executed
97a979
- * \param[in,out] userdata      Datapointer to be passed to callbacks
97a979
- * \param[in]     done          Callback to notify action has failed/succeeded
97a979
- * \param[in]     fork_callback Callback to notify successful fork of child
97a979
- *
97a979
- * \return pcmk_ok if ownership of action has been taken, -errno otherwise
97a979
- */
97a979
-int
97a979
-stonith_action_execute_async(stonith_action_t * action,
97a979
-                             void *userdata,
97a979
-                             void (*done) (int pid,
97a979
-                                           const pcmk__action_result_t *result,
97a979
-                                           void *user_data),
97a979
-                             void (*fork_cb) (int pid, void *user_data))
97a979
-{
97a979
-    if (!action) {
97a979
-        return -EINVAL;
97a979
-    }
97a979
-
97a979
-    action->userdata = userdata;
97a979
-    action->done_cb = done;
97a979
-    action->fork_cb = fork_cb;
97a979
-    action->async = 1;
97a979
-
97a979
-    return internal_stonith_action_execute(action);
97a979
-}
97a979
-
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Execute a stonith action
97a979
- *
97a979
- * \param[in,out] action  Action to execute
97a979
- *
97a979
- * \return pcmk_ok on success, -errno otherwise
97a979
- */
97a979
-int
97a979
-stonith__execute(stonith_action_t *action)
97a979
-{
97a979
-    int rc = pcmk_ok;
97a979
-
97a979
-    CRM_CHECK(action != NULL, return -EINVAL);
97a979
-
97a979
-    // Keep trying until success, max retries, or timeout
97a979
-    do {
97a979
-        rc = internal_stonith_action_execute(action);
97a979
-    } while ((rc != pcmk_ok) && update_remaining_timeout(action));
97a979
-
97a979
-    return rc;
97a979
-}
97a979
-
97a979
 static int
97a979
 stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
97a979
                         stonith_key_value_t ** devices, int timeout)
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 883a3cf7d3f73d02417d3997a7885dd5a7bebac7 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Wed, 10 Nov 2021 15:39:17 -0600
97a979
Subject: [PATCH 05/13] Low: fencing,executor: improve mapping of legacy return
97a979
 code to execution status
97a979
97a979
Move stonith_rc2status() from the executor to the fencing library for future
97a979
reuse, exposing it internally as stonith__legacy2status(). Update it to use
97a979
recently added execution status codes.
97a979
---
97a979
 daemons/execd/execd_commands.c | 66 ++++++++--------------------------
97a979
 include/crm/fencing/internal.h |  2 ++
97a979
 lib/fencing/st_actions.c       | 36 +++++++++++++++++++
97a979
 3 files changed, 52 insertions(+), 52 deletions(-)
97a979
97a979
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
97a979
index 02070bf11..0ccaa1ced 100644
97a979
--- a/daemons/execd/execd_commands.c
97a979
+++ b/daemons/execd/execd_commands.c
97a979
@@ -21,6 +21,7 @@
97a979
 #include <unistd.h>
97a979
 
97a979
 #include <crm/crm.h>
97a979
+#include <crm/fencing/internal.h>
97a979
 #include <crm/services.h>
97a979
 #include <crm/services_internal.h>
97a979
 #include <crm/common/mainloop.h>
97a979
@@ -999,56 +1000,6 @@ action_complete(svc_action_t * action)
97a979
     cmd_finalize(cmd, rsc);
97a979
 }
97a979
 
97a979
-/*!
97a979
- * \internal
97a979
- * \brief Determine operation status of a stonith operation
97a979
- *
97a979
- * Non-stonith resource operations get their operation status directly from the
97a979
- * service library, but the fencer does not have an equivalent, so we must infer
97a979
- * an operation status from the fencer API's return code.
97a979
- *
97a979
- * \param[in] action       Name of action performed on stonith resource
97a979
- * \param[in] interval_ms  Action interval
97a979
- * \param[in] rc           Action result from fencer
97a979
- *
97a979
- * \return Operation status corresponding to fencer API return code
97a979
- */
97a979
-static int
97a979
-stonith_rc2status(const char *action, guint interval_ms, int rc)
97a979
-{
97a979
-    int status = PCMK_EXEC_DONE;
97a979
-
97a979
-    switch (rc) {
97a979
-        case pcmk_ok:
97a979
-            break;
97a979
-
97a979
-        case -EOPNOTSUPP:
97a979
-        case -EPROTONOSUPPORT:
97a979
-            status = PCMK_EXEC_NOT_SUPPORTED;
97a979
-            break;
97a979
-
97a979
-        case -ETIME:
97a979
-        case -ETIMEDOUT:
97a979
-            status = PCMK_EXEC_TIMEOUT;
97a979
-            break;
97a979
-
97a979
-        case -ENOTCONN:
97a979
-        case -ECOMM:
97a979
-            // Couldn't talk to fencer
97a979
-            status = PCMK_EXEC_ERROR;
97a979
-            break;
97a979
-
97a979
-        case -ENODEV:
97a979
-            // The device is not registered with the fencer
97a979
-            status = PCMK_EXEC_ERROR;
97a979
-            break;
97a979
-
97a979
-        default:
97a979
-            break;
97a979
-    }
97a979
-    return status;
97a979
-}
97a979
-
97a979
 static void
97a979
 stonith_action_complete(lrmd_cmd_t * cmd, int rc)
97a979
 {
97a979
@@ -1062,8 +1013,19 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
97a979
      * the fencer return code.
97a979
      */
97a979
     if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) {
97a979
-        cmd->result.execution_status = stonith_rc2status(cmd->action,
97a979
-                                                         cmd->interval_ms, rc);
97a979
+        cmd->result.execution_status = stonith__legacy2status(rc);
97a979
+
97a979
+        // Simplify status codes from fencer
97a979
+        switch (cmd->result.execution_status) {
97a979
+            case PCMK_EXEC_NOT_CONNECTED:
97a979
+            case PCMK_EXEC_INVALID:
97a979
+            case PCMK_EXEC_NO_FENCE_DEVICE:
97a979
+            case PCMK_EXEC_NO_SECRETS:
97a979
+                cmd->result.execution_status = PCMK_EXEC_ERROR;
97a979
+                break;
97a979
+            default:
97a979
+                break;
97a979
+        }
97a979
 
97a979
         // Certain successful actions change the known state of the resource
97a979
         if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index 6a7e4232c..80f6443be 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -182,6 +182,8 @@ bool stonith__event_state_pending(stonith_history_t *history, void *user_data);
97a979
 bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
97a979
 bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
97a979
 
97a979
+int stonith__legacy2status(int rc);
97a979
+
97a979
 /*!
97a979
  * \internal
97a979
  * \brief Is a fencing operation in pending state?
97a979
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
97a979
index 64d3afd5d..9e785595a 100644
97a979
--- a/lib/fencing/st_actions.c
97a979
+++ b/lib/fencing/st_actions.c
97a979
@@ -360,6 +360,42 @@ stonith__result2rc(const pcmk__action_result_t *result)
97a979
     return pcmk_rc_error;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Determine execution status equivalent of legacy fencer return code
97a979
+ *
97a979
+ * Fence action notifications, and fence action callbacks from older fencers
97a979
+ * (<=2.1.2) in a rolling upgrade, will have only a legacy return code. Map this
97a979
+ * to an execution status as best as possible (essentially, the inverse of
97a979
+ * stonith__result2rc()).
97a979
+ *
97a979
+ * \param[in] rc           Legacy return code from fencer
97a979
+ *
97a979
+ * \return Execution status best corresponding to \p rc
97a979
+ */
97a979
+int
97a979
+stonith__legacy2status(int rc)
97a979
+{
97a979
+    if (rc >= 0) {
97a979
+        return PCMK_EXEC_DONE;
97a979
+    }
97a979
+    switch (-rc) {
97a979
+        case EACCES:            return PCMK_EXEC_NO_SECRETS;
97a979
+        case ECANCELED:         return PCMK_EXEC_CANCELLED;
97a979
+        case EHOSTUNREACH:      return PCMK_EXEC_INVALID;
97a979
+        case EINPROGRESS:       return PCMK_EXEC_PENDING;
97a979
+        case ENODEV:            return PCMK_EXEC_NO_FENCE_DEVICE;
97a979
+        case ENOENT:            return PCMK_EXEC_NOT_INSTALLED;
97a979
+        case ENOTCONN:          return PCMK_EXEC_NOT_CONNECTED;
97a979
+        case EOPNOTSUPP:        return PCMK_EXEC_NOT_SUPPORTED;
97a979
+        case EPROTO:            return PCMK_EXEC_INVALID;
97a979
+        case EPROTONOSUPPORT:   return PCMK_EXEC_NOT_SUPPORTED;
97a979
+        case ETIME:             return PCMK_EXEC_TIMEOUT;
97a979
+        case ETIMEDOUT:         return PCMK_EXEC_TIMEOUT;
97a979
+        default:                return PCMK_EXEC_ERROR;
97a979
+    }
97a979
+}
97a979
+
97a979
 static void
97a979
 stonith_action_async_done(svc_action_t *svc_action)
97a979
 {
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 639a9f4a2cbeb6cc41b754a1dcb1f360a9500e03 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 16:54:32 -0600
97a979
Subject: [PATCH 06/13] Refactor: fencing: add functions for getting/setting
97a979
 result via XML
97a979
97a979
These will come in handy as we update the various fencer messages to include a
97a979
full result rather than just a legacy return code. The functions are in a new
97a979
source file fenced_messages.c which can have other stuff moved to it later.
97a979
---
97a979
 include/crm/fencing/internal.h |   3 +
97a979
 lib/fencing/st_actions.c       | 107 +++++++++++++++++++++++++++++++++
97a979
 2 files changed, 110 insertions(+)
97a979
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index 80f6443be..4b5fd3959 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -60,6 +60,9 @@ stonith_action_t *stonith_action_create(const char *agent,
97a979
 void stonith__destroy_action(stonith_action_t *action);
97a979
 pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
97a979
 int stonith__result2rc(const pcmk__action_result_t *result);
97a979
+void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
97a979
+void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result);
97a979
+xmlNode *stonith__find_xe_with_result(xmlNode *xml);
97a979
 
97a979
 int
97a979
 stonith_action_execute_async(stonith_action_t * action,
97a979
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
97a979
index 9e785595a..d4fc3f5ed 100644
97a979
--- a/lib/fencing/st_actions.c
97a979
+++ b/lib/fencing/st_actions.c
97a979
@@ -396,6 +396,113 @@ stonith__legacy2status(int rc)
97a979
     }
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Add a fencing result to an XML element as attributes
97a979
+ *
97a979
+ * \param[in] xml     XML element to add result to
97a979
+ * \param[in] result  Fencing result to add (assume success if NULL)
97a979
+ */
97a979
+void
97a979
+stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result)
97a979
+{
97a979
+    int exit_status = CRM_EX_OK;
97a979
+    enum pcmk_exec_status execution_status = PCMK_EXEC_DONE;
97a979
+    const char *exit_reason = NULL;
97a979
+    const char *action_stdout = NULL;
97a979
+    int rc = pcmk_ok;
97a979
+
97a979
+    CRM_CHECK(xml != NULL, return);
97a979
+
97a979
+    if (result != NULL) {
97a979
+        exit_status = result->exit_status;
97a979
+        execution_status = result->execution_status;
97a979
+        exit_reason = result->exit_reason;
97a979
+        action_stdout = result->action_stdout;
97a979
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
+    }
97a979
+
97a979
+    crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status);
97a979
+    crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status);
97a979
+    crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason);
97a979
+    crm_xml_add(xml, "st_output", action_stdout);
97a979
+
97a979
+    /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external
97a979
+     * code that use libstonithd <=2.1.2 don't check for the full result, and
97a979
+     * need a legacy return code instead.
97a979
+     */
97a979
+    crm_xml_add_int(xml, F_STONITH_RC, rc);
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Find a fencing result beneath an XML element
97a979
+ *
97a979
+ * \param[in]  xml     XML element to search
97a979
+ *
97a979
+ * \return \p xml or descendent of it that contains a fencing result, else NULL
97a979
+ */
97a979
+xmlNode *
97a979
+stonith__find_xe_with_result(xmlNode *xml)
97a979
+{
97a979
+    xmlNode *match = get_xpath_object("//@" XML_LRM_ATTR_RC, xml, LOG_NEVER);
97a979
+
97a979
+    if (match == NULL) {
97a979
+        /* @COMPAT Peers <=2.1.2 in a rolling upgrade provide only a legacy
97a979
+         * return code, not a full result, so check for that.
97a979
+         */
97a979
+        match = get_xpath_object("//@" F_STONITH_RC, xml, LOG_ERR);
97a979
+    }
97a979
+    return match;
97a979
+}
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Get a fencing result from an XML element's attributes
97a979
+ *
97a979
+ * \param[in]  xml     XML element with fencing result
97a979
+ * \param[out] result  Where to store fencing result
97a979
+ */
97a979
+void
97a979
+stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result)
97a979
+{
97a979
+    int exit_status = CRM_EX_OK;
97a979
+    int execution_status = PCMK_EXEC_DONE;
97a979
+    const char *exit_reason = NULL;
97a979
+    char *action_stdout = NULL;
97a979
+
97a979
+    CRM_CHECK((xml != NULL) && (result != NULL), return);
97a979
+
97a979
+    exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON);
97a979
+    action_stdout = crm_element_value_copy(xml, "st_output");
97a979
+
97a979
+    // A result must include an exit status and execution status
97a979
+    if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0)
97a979
+        || (crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS,
97a979
+                                  &execution_status) < 0)) {
97a979
+        int rc = pcmk_ok;
97a979
+        exit_status = CRM_EX_ERROR;
97a979
+
97a979
+        /* @COMPAT Peers <=2.1.2 in rolling upgrades provide only a legacy
97a979
+         * return code, not a full result, so check for that.
97a979
+         */
97a979
+        if (crm_element_value_int(xml, F_STONITH_RC, &rc) == 0) {
97a979
+            if ((rc == pcmk_ok) || (rc == -EINPROGRESS)) {
97a979
+                exit_status = CRM_EX_OK;
97a979
+            }
97a979
+            execution_status = stonith__legacy2status(rc);
97a979
+            exit_reason = pcmk_strerror(rc);
97a979
+
97a979
+        } else {
97a979
+            execution_status = PCMK_EXEC_ERROR;
97a979
+            exit_reason = "Fencer reply contained neither a full result "
97a979
+                          "nor a legacy return code (bug?)";
97a979
+        }
97a979
+    }
97a979
+    pcmk__set_result(result, exit_status, execution_status, exit_reason);
97a979
+    pcmk__set_result_output(result, action_stdout, NULL);
97a979
+}
97a979
+
97a979
 static void
97a979
 stonith_action_async_done(svc_action_t *svc_action)
97a979
 {
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 1f0121c6ad0d0235bcf01c8b60f9153592b3db83 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 10:10:53 -0600
97a979
Subject: [PATCH 07/13] Refactor: fencing: rename functions for invoking fence
97a979
 callbacks
97a979
97a979
... to make it clearer what the difference between them is
97a979
---
97a979
 lib/fencing/st_client.c | 44 +++++++++++++++++++++++++++++++++--------
97a979
 1 file changed, 36 insertions(+), 8 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 944cd1863..dfc5860fc 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -847,9 +847,21 @@ stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
97a979
     return pcmk_ok;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Invoke a (single) specified fence action callback
97a979
+ *
97a979
+ * \param[in] st        Fencer API connection
97a979
+ * \param[in] call_id   If positive, call ID of completed fence action, otherwise
97a979
+ *                      legacy return code for early action failure
97a979
+ * \param[in] rc        Legacy return code for action result
97a979
+ * \param[in] userdata  User data to pass to callback
97a979
+ * \param[in] callback  Fence action callback to invoke
97a979
+ */
97a979
 static void
97a979
-invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
97a979
-                void (*callback) (stonith_t * st, stonith_callback_data_t * data))
97a979
+invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata,
97a979
+                             void (*callback) (stonith_t *st,
97a979
+                                               stonith_callback_data_t *data))
97a979
 {
97a979
     stonith_callback_data_t data = { 0, };
97a979
 
97a979
@@ -860,8 +872,21 @@ invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
97a979
     callback(st, &data);
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Invoke any callbacks registered for a specified fence action result
97a979
+ *
97a979
+ * Given a fence action result from the fencer, invoke any callback registered
97a979
+ * for that action, as well as any global callback registered.
97a979
+ *
97a979
+ * \param[in] st        Fencer API connection
97a979
+ * \param[in] msg       If non-NULL, fencer reply
97a979
+ * \param[in] call_id   If \p msg is NULL, call ID of action that timed out
97a979
+ * \param[in] rc        Legacy return code for result of action
97a979
+ */
97a979
 static void
97a979
-stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
97a979
+invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
97a979
+                            int rc)
97a979
 {
97a979
     stonith_private_t *private = NULL;
97a979
     stonith_callback_client_t *blob = NULL;
97a979
@@ -899,7 +924,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc
97a979
 
97a979
     if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
97a979
         crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
97a979
-        invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
97a979
+        invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data,
97a979
+                                     local_blob.callback);
97a979
 
97a979
     } else if (private->op_callback == NULL && rc != pcmk_ok) {
97a979
         crm_warn("Fencing command failed: %s", pcmk_strerror(rc));
97a979
@@ -908,7 +934,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc
97a979
 
97a979
     if (private->op_callback != NULL) {
97a979
         crm_trace("Invoking global callback for call %d", call_id);
97a979
-        invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
97a979
+        invoke_fence_action_callback(stonith, call_id, rc, NULL,
97a979
+                                     private->op_callback);
97a979
     }
97a979
     crm_trace("OP callback activated.");
97a979
 }
97a979
@@ -919,7 +946,7 @@ stonith_async_timeout_handler(gpointer data)
97a979
     struct timer_rec_s *timer = data;
97a979
 
97a979
     crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
97a979
-    stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
97a979
+    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME);
97a979
 
97a979
     /* Always return TRUE, never remove the handler
97a979
      * We do that in stonith_del_callback()
97a979
@@ -994,7 +1021,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
97a979
     crm_trace("Activating %s callbacks...", type);
97a979
 
97a979
     if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) {
97a979
-        stonith_perform_callback(st, blob.xml, 0, 0);
97a979
+        invoke_registered_callbacks(st, blob.xml, 0, 0);
97a979
 
97a979
     } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) {
97a979
         foreach_notify_entry(private, stonith_send_notification, &blob;;
97a979
@@ -1229,7 +1256,8 @@ stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int opti
97a979
     } else if (call_id < 0) {
97a979
         if (!(options & st_opt_report_only_success)) {
97a979
             crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
97a979
-            invoke_callback(stonith, call_id, call_id, user_data, callback);
97a979
+            invoke_fence_action_callback(stonith, call_id, call_id, user_data,
97a979
+                                         callback);
97a979
         } else {
97a979
             crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
97a979
         }
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From c32f11e70a88244f5a3217608055a4eaf8d28231 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 10:21:00 -0600
97a979
Subject: [PATCH 08/13] Refactor: fencing: drop unnecessary argument when
97a979
 invoking callbacks
97a979
97a979
Refactor invoke_registered_callbacks() to treat a NULL message as a timeout, so
97a979
we can drop the rc argument.
97a979
---
97a979
 lib/fencing/st_client.c | 17 +++++++++++------
97a979
 1 file changed, 11 insertions(+), 6 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index dfc5860fc..9f2b0c1c1 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -882,15 +882,14 @@ invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata,
97a979
  * \param[in] st        Fencer API connection
97a979
  * \param[in] msg       If non-NULL, fencer reply
97a979
  * \param[in] call_id   If \p msg is NULL, call ID of action that timed out
97a979
- * \param[in] rc        Legacy return code for result of action
97a979
  */
97a979
 static void
97a979
-invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
97a979
-                            int rc)
97a979
+invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
 {
97a979
     stonith_private_t *private = NULL;
97a979
     stonith_callback_client_t *blob = NULL;
97a979
     stonith_callback_client_t local_blob;
97a979
+    int rc = pcmk_ok;
97a979
 
97a979
     CRM_CHECK(stonith != NULL, return);
97a979
     CRM_CHECK(stonith->st_private != NULL, return);
97a979
@@ -902,7 +901,13 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
97a979
     local_blob.user_data = NULL;
97a979
     local_blob.only_success = FALSE;
97a979
 
97a979
-    if (msg != NULL) {
97a979
+    if (msg == NULL) {
97a979
+        // Fencer didn't reply in time
97a979
+        rc = -ETIME;
97a979
+
97a979
+    } else {
97a979
+        // We have the fencer reply
97a979
+
97a979
         crm_element_value_int(msg, F_STONITH_RC, &rc);
97a979
         crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
97a979
     }
97a979
@@ -946,7 +951,7 @@ stonith_async_timeout_handler(gpointer data)
97a979
     struct timer_rec_s *timer = data;
97a979
 
97a979
     crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
97a979
-    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME);
97a979
+    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id);
97a979
 
97a979
     /* Always return TRUE, never remove the handler
97a979
      * We do that in stonith_del_callback()
97a979
@@ -1021,7 +1026,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
97a979
     crm_trace("Activating %s callbacks...", type);
97a979
 
97a979
     if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) {
97a979
-        invoke_registered_callbacks(st, blob.xml, 0, 0);
97a979
+        invoke_registered_callbacks(st, blob.xml, 0);
97a979
 
97a979
     } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) {
97a979
         foreach_notify_entry(private, stonith_send_notification, &blob;;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 5d8279b51ea9df738354649e4065663f2c16f1e6 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 10:21:57 -0600
97a979
Subject: [PATCH 09/13] Log: fencing: improve message for callback errors
97a979
97a979
Improve checking of fencer replies, which also allows us to distinguish an
97a979
internal bug from a bad fencer reply in logs. Lower the bad reply message to
97a979
warning.
97a979
---
97a979
 lib/fencing/st_client.c | 13 +++++++++----
97a979
 1 file changed, 9 insertions(+), 4 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 9f2b0c1c1..170b9d450 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -904,15 +904,20 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
     if (msg == NULL) {
97a979
         // Fencer didn't reply in time
97a979
         rc = -ETIME;
97a979
+        CRM_LOG_ASSERT(call_id > 0);
97a979
 
97a979
     } else {
97a979
         // We have the fencer reply
97a979
 
97a979
-        crm_element_value_int(msg, F_STONITH_RC, &rc);
97a979
-        crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
97a979
-    }
97a979
+        if (crm_element_value_int(msg, F_STONITH_RC, &rc) != 0) {
97a979
+            rc = -pcmk_err_generic;
97a979
+        }
97a979
 
97a979
-    CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
97a979
+        if ((crm_element_value_int(msg, F_STONITH_CALLID, &call_id) != 0)
97a979
+            || (call_id <= 0)) {
97a979
+            crm_log_xml_warn(msg, "Bad fencer reply");
97a979
+        }
97a979
+    }
97a979
 
97a979
     blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
97a979
                                      call_id);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From e03c14d24e8cb011e870b9460930d139705bf0a2 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 9 Nov 2021 14:59:12 -0600
97a979
Subject: [PATCH 10/13] Doc: fencing: correct stonith_api_operations_t method
97a979
 descriptions
97a979
97a979
Many of the methods return a positive call ID on success
97a979
---
97a979
 include/crm/stonith-ng.h | 60 ++++++++++++++++++++++------------------
97a979
 1 file changed, 33 insertions(+), 27 deletions(-)
97a979
97a979
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
97a979
index 8d6ad477d..9643820e9 100644
97a979
--- a/include/crm/stonith-ng.h
97a979
+++ b/include/crm/stonith-ng.h
97a979
@@ -164,39 +164,38 @@ typedef struct stonith_api_operations_s
97a979
     int (*disconnect)(stonith_t *st);
97a979
 
97a979
     /*!
97a979
-     * \brief Remove a registered stonith device with the local stonith daemon.
97a979
+     * \brief Unregister a fence device with the local fencer
97a979
      *
97a979
-     * \note Synchronous, guaranteed to occur in daemon before function returns.
97a979
-     *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*remove_device)(
97a979
         stonith_t *st, int options, const char *name);
97a979
 
97a979
     /*!
97a979
-     * \brief Register a stonith device with the local stonith daemon.
97a979
+     * \brief Register a fence device with the local fencer
97a979
      *
97a979
-     * \note Synchronous, guaranteed to occur in daemon before function returns.
97a979
-     *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*register_device)(
97a979
         stonith_t *st, int options, const char *id,
97a979
         const char *provider, const char *agent, stonith_key_value_t *params);
97a979
 
97a979
     /*!
97a979
-     * \brief Remove a fencing level for a specific node.
97a979
+     * \brief Unregister a fencing level for specified node with local fencer
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*remove_level)(
97a979
         stonith_t *st, int options, const char *node, int level);
97a979
 
97a979
     /*!
97a979
-     * \brief Register a fencing level containing the fencing devices to be used
97a979
-     *        at that level for a specific node.
97a979
+     * \brief Register a fencing level for specified node with local fencer
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*register_level)(
97a979
         stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
97a979
@@ -226,21 +225,24 @@ typedef struct stonith_api_operations_s
97a979
     /*!
97a979
      * \brief Retrieve string listing hosts and port assignments from a local stonith device.
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
97a979
 
97a979
     /*!
97a979
      * \brief Check to see if a local stonith device is reachable
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
97a979
 
97a979
     /*!
97a979
      * \brief Check to see if a local stonith device's port is reachable
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
97a979
 
97a979
@@ -267,7 +269,8 @@ typedef struct stonith_api_operations_s
97a979
      * \param timeout, The default per device timeout to use with each device
97a979
      *                 capable of fencing the target.
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*fence)(stonith_t *st, int options, const char *node, const char *action,
97a979
                  int timeout, int tolerance);
97a979
@@ -275,7 +278,8 @@ typedef struct stonith_api_operations_s
97a979
     /*!
97a979
      * \brief Manually confirm that a node is down.
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*confirm)(stonith_t *st, int options, const char *node);
97a979
 
97a979
@@ -304,9 +308,6 @@ typedef struct stonith_api_operations_s
97a979
      * \param[in] callback       The callback function to register
97a979
      *
97a979
      * \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise
97a979
-     *
97a979
-     * \todo This function should return \c pcmk_ok on success, and \c call_id
97a979
-     *       when negative, but that would break backward compatibility.
97a979
      */
97a979
     int (*register_callback)(stonith_t *st,
97a979
         int call_id,
97a979
@@ -317,12 +318,14 @@ typedef struct stonith_api_operations_s
97a979
         void (*callback)(stonith_t *st, stonith_callback_data_t *data));
97a979
 
97a979
     /*!
97a979
-     * \brief Remove a registered callback for a given call id.
97a979
+     * \brief Remove a registered callback for a given call id
97a979
+     *
97a979
+     * \return pcmk_ok
97a979
      */
97a979
     int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
97a979
 
97a979
     /*!
97a979
-     * \brief Remove fencing level for specific node, node regex or attribute
97a979
+     * \brief Unregister fencing level for specified node, pattern or attribute
97a979
      *
97a979
      * \param[in] st      Fencer connection to use
97a979
      * \param[in] options Bitmask of stonith_call_options to pass to the fencer
97a979
@@ -332,7 +335,8 @@ typedef struct stonith_api_operations_s
97a979
      * \param[in] value   If not NULL, target by this node attribute value
97a979
      * \param[in] level   Index number of level to remove
97a979
      *
97a979
-     * \return 0 on success, negative error code otherwise
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      *
97a979
      * \note The caller should set only one of node, pattern or attr/value.
97a979
      */
97a979
@@ -341,7 +345,7 @@ typedef struct stonith_api_operations_s
97a979
                              const char *attr, const char *value, int level);
97a979
 
97a979
     /*!
97a979
-     * \brief Register fencing level for specific node, node regex or attribute
97a979
+     * \brief Register fencing level for specified node, pattern or attribute
97a979
      *
97a979
      * \param[in] st          Fencer connection to use
97a979
      * \param[in] options     Bitmask of stonith_call_options to pass to fencer
97a979
@@ -352,7 +356,8 @@ typedef struct stonith_api_operations_s
97a979
      * \param[in] level       Index number of level to add
97a979
      * \param[in] device_list Devices to use in level
97a979
      *
97a979
-     * \return 0 on success, negative error code otherwise
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      *
97a979
      * \note The caller should set only one of node, pattern or attr/value.
97a979
      */
97a979
@@ -398,7 +403,8 @@ typedef struct stonith_api_operations_s
97a979
      * \param delay, Apply a fencing delay. Value -1 means disable also any
97a979
      *               static/random fencing delays from pcmk_delay_base/max
97a979
      *
97a979
-     * \return Legacy Pacemaker return code
97a979
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
97a979
+     *         on success, otherwise a negative legacy Pacemaker return code
97a979
      */
97a979
     int (*fence_with_delay)(stonith_t *st, int options, const char *node, const char *action,
97a979
                             int timeout, int tolerance, int delay);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 18c382731889b626b21ba6a14f9213ef1e45a524 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 23 Nov 2021 11:14:24 -0600
97a979
Subject: [PATCH 11/13] Refactor: fencing: define constant for XML attribute
97a979
 for action output
97a979
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 4 ++--
97a979
 include/crm/fencing/internal.h   | 1 +
97a979
 lib/fencing/st_actions.c         | 4 ++--
97a979
 lib/fencing/st_client.c          | 2 +-
97a979
 4 files changed, 6 insertions(+), 5 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 26501a4b3..aa14c52af 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2677,7 +2677,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
97a979
 
97a979
     crm_xml_add(reply, "st_origin", __func__);
97a979
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
97a979
-    crm_xml_add(reply, "st_output", output);
97a979
+    crm_xml_add(reply, F_STONITH_OUTPUT, output);
97a979
     crm_xml_add_int(reply, F_STONITH_RC, rc);
97a979
 
97a979
     if (request == NULL) {
97a979
@@ -2743,7 +2743,7 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
97a979
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
97a979
     crm_xml_add_int(reply, F_STONITH_RC,
97a979
                     pcmk_rc2legacy(stonith__result2rc(result)));
97a979
-    crm_xml_add(reply, "st_output", result->action_stdout);
97a979
+    crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout);
97a979
     return reply;
97a979
 }
97a979
 
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index 4b5fd3959..f0d294a0b 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -105,6 +105,7 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
97a979
 #  define F_STONITH_REMOTE_OP_ID  "st_remote_op"
97a979
 #  define F_STONITH_REMOTE_OP_ID_RELAY  "st_remote_op_relay"
97a979
 #  define F_STONITH_RC            "st_rc"
97a979
+#  define F_STONITH_OUTPUT        "st_output"
97a979
 /*! Timeout period per a device execution */
97a979
 #  define F_STONITH_TIMEOUT       "st_timeout"
97a979
 #  define F_STONITH_TOLERANCE     "st_tolerance"
97a979
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
97a979
index d4fc3f5ed..5636810a5 100644
97a979
--- a/lib/fencing/st_actions.c
97a979
+++ b/lib/fencing/st_actions.c
97a979
@@ -425,7 +425,7 @@ stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result)
97a979
     crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status);
97a979
     crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status);
97a979
     crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason);
97a979
-    crm_xml_add(xml, "st_output", action_stdout);
97a979
+    crm_xml_add(xml, F_STONITH_OUTPUT, action_stdout);
97a979
 
97a979
     /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external
97a979
      * code that use libstonithd <=2.1.2 don't check for the full result, and
97a979
@@ -474,7 +474,7 @@ stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result)
97a979
     CRM_CHECK((xml != NULL) && (result != NULL), return);
97a979
 
97a979
     exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON);
97a979
-    action_stdout = crm_element_value_copy(xml, "st_output");
97a979
+    action_stdout = crm_element_value_copy(xml, F_STONITH_OUTPUT);
97a979
 
97a979
     // A result must include an exit status and execution status
97a979
     if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0)
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 170b9d450..2dfadf922 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -600,7 +600,7 @@ stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **l
97a979
     if (output && list_info) {
97a979
         const char *list_str;
97a979
 
97a979
-        list_str = crm_element_value(output, "st_output");
97a979
+        list_str = crm_element_value(output, F_STONITH_OUTPUT);
97a979
 
97a979
         if (list_str) {
97a979
             *list_info = strdup(list_str);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 9fe9ed5d46c810cb9c12eb07271373ab92d271cd Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 23 Nov 2021 11:39:32 -0600
97a979
Subject: [PATCH 12/13] Refactor: fencing: simplify invoking callbacks
97a979
97a979
---
97a979
 lib/fencing/st_client.c | 42 +++++++++++++++++------------------------
97a979
 1 file changed, 17 insertions(+), 25 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 2dfadf922..2ca094566 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -887,8 +887,7 @@ static void
97a979
 invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
 {
97a979
     stonith_private_t *private = NULL;
97a979
-    stonith_callback_client_t *blob = NULL;
97a979
-    stonith_callback_client_t local_blob;
97a979
+    stonith_callback_client_t *cb_info = NULL;
97a979
     int rc = pcmk_ok;
97a979
 
97a979
     CRM_CHECK(stonith != NULL, return);
97a979
@@ -896,11 +895,6 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
 
97a979
     private = stonith->st_private;
97a979
 
97a979
-    local_blob.id = NULL;
97a979
-    local_blob.callback = NULL;
97a979
-    local_blob.user_data = NULL;
97a979
-    local_blob.only_success = FALSE;
97a979
-
97a979
     if (msg == NULL) {
97a979
         // Fencer didn't reply in time
97a979
         rc = -ETIME;
97a979
@@ -919,26 +913,21 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
         }
97a979
     }
97a979
 
97a979
-    blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
97a979
-                                     call_id);
97a979
-    if (blob != NULL) {
97a979
-        local_blob = *blob;
97a979
-        blob = NULL;
97a979
-
97a979
-        stonith_api_del_callback(stonith, call_id, FALSE);
97a979
-
97a979
-    } else {
97a979
-        crm_trace("No callback found for call %d", call_id);
97a979
-        local_blob.callback = NULL;
97a979
+    if (call_id > 0) {
97a979
+        cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
97a979
+                                            call_id);
97a979
     }
97a979
 
97a979
-    if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
97a979
-        crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
97a979
-        invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data,
97a979
-                                     local_blob.callback);
97a979
+    if ((cb_info != NULL) && (cb_info->callback != NULL)
97a979
+        && (rc == pcmk_ok || !(cb_info->only_success))) {
97a979
+        crm_trace("Invoking callback %s for call %d",
97a979
+                  crm_str(cb_info->id), call_id);
97a979
+        invoke_fence_action_callback(stonith, call_id, rc, cb_info->user_data,
97a979
+                                     cb_info->callback);
97a979
 
97a979
-    } else if (private->op_callback == NULL && rc != pcmk_ok) {
97a979
-        crm_warn("Fencing command failed: %s", pcmk_strerror(rc));
97a979
+    } else if ((private->op_callback == NULL) && (rc != pcmk_ok)) {
97a979
+        crm_warn("Fencing action without registered callback failed: %s",
97a979
+                 pcmk_strerror(rc));
97a979
         crm_log_xml_debug(msg, "Failed fence update");
97a979
     }
97a979
 
97a979
@@ -947,7 +936,10 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
97a979
         invoke_fence_action_callback(stonith, call_id, rc, NULL,
97a979
                                      private->op_callback);
97a979
     }
97a979
-    crm_trace("OP callback activated.");
97a979
+
97a979
+    if (cb_info != NULL) {
97a979
+        stonith_api_del_callback(stonith, call_id, FALSE);
97a979
+    }
97a979
 }
97a979
 
97a979
 static gboolean
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 8113b800ce677ba17a16ca176e8f6f9b4a042316 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 23 Nov 2021 18:14:48 -0600
97a979
Subject: [PATCH 13/13] Refactor: fencing: add a missing "break" statement
97a979
97a979
No effect, but more correct
97a979
---
97a979
 lib/fencing/st_actions.c | 1 +
97a979
 1 file changed, 1 insertion(+)
97a979
97a979
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
97a979
index 5636810a5..7eaa8b0f2 100644
97a979
--- a/lib/fencing/st_actions.c
97a979
+++ b/lib/fencing/st_actions.c
97a979
@@ -336,6 +336,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
97a979
                 case CRM_EX_EXPIRED:            return EHOSTUNREACH;
97a979
                 default:                        break;
97a979
             }
97a979
+            break;
97a979
 
97a979
         default:
97a979
             break;
97a979
-- 
97a979
2.27.0
97a979