Blame SOURCES/003-fencing-reasons.patch

533c21
From 8e6362cb2129bd56f817d449a195f3da87a545fa Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Fri, 12 Nov 2021 14:28:56 -0600
533c21
Subject: [PATCH 01/13] Refactor: libcrmcommon,fencer: convenience macro for
533c21
 initializing results
533c21
533c21
for future reuse
533c21
---
533c21
 daemons/fenced/fenced_commands.c      | 14 ++------------
533c21
 include/crm/common/results_internal.h | 15 +++++++++++++++
533c21
 2 files changed, 17 insertions(+), 12 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index 87600573e..9f2f1cc40 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -388,12 +388,7 @@ static void
533c21
 report_internal_result(async_command_t *cmd, int exit_status,
533c21
                        int execution_status, const char *exit_reason)
533c21
 {
533c21
-    pcmk__action_result_t result = {
533c21
-        // Ensure we don't pass garbage to free()
533c21
-        .exit_reason = NULL,
533c21
-        .action_stdout = NULL,
533c21
-        .action_stderr = NULL
533c21
-    };
533c21
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
533c21
 
533c21
     pcmk__set_result(&result, exit_status, execution_status, exit_reason);
533c21
     cmd->done_cb(0, &result, cmd);
533c21
@@ -2616,12 +2611,7 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
533c21
     }
533c21
 
533c21
     if (device == NULL) { // No device found
533c21
-        pcmk__action_result_t result = {
533c21
-            // Ensure we don't pass garbage to free()
533c21
-            .exit_reason = NULL,
533c21
-            .action_stdout = NULL,
533c21
-            .action_stderr = NULL
533c21
-        };
533c21
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
533c21
 
533c21
         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
533c21
                          "No fence device configured for target");
533c21
diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h
533c21
index 804bf2a7a..6befaa0ed 100644
533c21
--- a/include/crm/common/results_internal.h
533c21
+++ b/include/crm/common/results_internal.h
533c21
@@ -30,6 +30,21 @@ typedef struct {
533c21
     char *action_stderr;    // Action error output
533c21
 } pcmk__action_result_t;
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Static initialization for an action result
533c21
+ *
533c21
+ * \note Importantly, this ensures pcmk__reset_result() won't try to free
533c21
+ *       garbage.
533c21
+ */
533c21
+#define PCMK__UNKNOWN_RESULT {                  \
533c21
+        .exit_status = CRM_EX_OK,               \
533c21
+        .execution_status = PCMK_EXEC_UNKNOWN,  \
533c21
+        .exit_reason = NULL,                    \
533c21
+        .action_stdout = NULL,                  \
533c21
+        .action_stderr = NULL,                  \
533c21
+    }
533c21
+
533c21
 void pcmk__set_result(pcmk__action_result_t *result, int exit_status,
533c21
                       enum pcmk_exec_status exec_status,
533c21
                       const char *exit_reason);
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 0937c92476ac737a5f5146932824bde8bdd7db98 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Fri, 12 Nov 2021 16:02:27 -0600
533c21
Subject: [PATCH 02/13] Refactor: various: add convenience function for
533c21
 checking result success
533c21
533c21
A successful pcmk__action_result_t has both exit status CRM_EX_OK (a.k.a
533c21
PCMK_OCF_OK) and execution status PCMK_EXEC_DONE. Since checking that is
533c21
clunky, we sometimes just check exit status, which is less than ideal.
533c21
533c21
The convenience function makes it easy to check both, and improves readability.
533c21
---
533c21
 daemons/controld/controld_remote_ra.c |  4 ++--
533c21
 daemons/execd/execd_commands.c        | 12 ++++++------
533c21
 daemons/fenced/fenced_commands.c      | 14 ++++++--------
533c21
 include/crm/common/results_internal.h | 16 ++++++++++++++++
533c21
 lib/fencing/st_client.c               |  4 ++--
533c21
 lib/fencing/st_rhcs.c                 |  2 +-
533c21
 6 files changed, 33 insertions(+), 19 deletions(-)
533c21
533c21
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
533c21
index 74cbfd673..55ac162c7 100644
533c21
--- a/daemons/controld/controld_remote_ra.c
533c21
+++ b/daemons/controld/controld_remote_ra.c
533c21
@@ -297,7 +297,7 @@ static void
533c21
 check_remote_node_state(remote_ra_cmd_t *cmd)
533c21
 {
533c21
     /* Only successful actions can change node state */
533c21
-    if (cmd->result.exit_status != PCMK_OCF_OK) {
533c21
+    if (!pcmk__result_ok(&(cmd->result))) {
533c21
         return;
533c21
     }
533c21
 
533c21
@@ -365,7 +365,7 @@ report_remote_ra_result(remote_ra_cmd_t * cmd)
533c21
     lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
533c21
                      cmd->result.exit_reason);
533c21
 
533c21
-    if (cmd->reported_success && (cmd->result.exit_status != PCMK_OCF_OK)) {
533c21
+    if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) {
533c21
         op.t_rcchange = (unsigned int) time(NULL);
533c21
         /* This edge case will likely never ever occur, but if it does the
533c21
          * result is that a failure will not be processed correctly. This is only
533c21
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
533c21
index 667525039..02070bf11 100644
533c21
--- a/daemons/execd/execd_commands.c
533c21
+++ b/daemons/execd/execd_commands.c
533c21
@@ -878,7 +878,7 @@ action_complete(svc_action_t * action)
533c21
     }
533c21
 
533c21
     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
533c21
-        if ((cmd->result.exit_status == PCMK_OCF_OK)
533c21
+        if (pcmk__result_ok(&(cmd->result))
533c21
             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
533c21
             /* systemd returns from start and stop actions after the action
533c21
              * begins, not after it completes. We have to jump through a few
533c21
@@ -894,7 +894,7 @@ action_complete(svc_action_t * action)
533c21
             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
533c21
                 goagain = true;
533c21
 
533c21
-            } else if ((cmd->result.exit_status == PCMK_OCF_OK)
533c21
+            } else if (pcmk__result_ok(&(cmd->result))
533c21
                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
533c21
                 goagain = true;
533c21
 
533c21
@@ -927,12 +927,12 @@ action_complete(svc_action_t * action)
533c21
 #if SUPPORT_NAGIOS
533c21
     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
533c21
         if (action_matches(cmd, "monitor", 0)
533c21
-            && (cmd->result.exit_status == PCMK_OCF_OK)) {
533c21
+            && pcmk__result_ok(&(cmd->result))) {
533c21
             /* Successfully executed --version for the nagios plugin */
533c21
             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
533c21
 
533c21
         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
533c21
-                   && (cmd->result.exit_status != PCMK_OCF_OK)) {
533c21
+                   && !pcmk__result_ok(&(cmd->result))) {
533c21
 #ifdef PCMK__TIME_USE_CGT
533c21
             goagain = true;
533c21
 #endif
533c21
@@ -955,7 +955,7 @@ action_complete(svc_action_t * action)
533c21
             cmd->start_delay = delay;
533c21
             cmd->timeout = timeout_left;
533c21
 
533c21
-            if (cmd->result.exit_status == PCMK_OCF_OK) {
533c21
+            if (pcmk__result_ok(&(cmd->result))) {
533c21
                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
533c21
                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
533c21
 
533c21
@@ -1066,7 +1066,7 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
533c21
                                                          cmd->interval_ms, rc);
533c21
 
533c21
         // Certain successful actions change the known state of the resource
533c21
-        if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) {
533c21
+        if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
533c21
             if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
533c21
                 rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
533c21
             } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index 9f2f1cc40..26501a4b3 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -1188,8 +1188,7 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
533c21
 
533c21
     mainloop_set_trigger(dev->work);
533c21
 
533c21
-    if ((result->execution_status == PCMK_EXEC_DONE)
533c21
-        && (result->exit_status == CRM_EX_OK)) {
533c21
+    if (pcmk__result_ok(result)) {
533c21
         crm_info("Refreshing target list for %s", dev->id);
533c21
         g_list_free_full(dev->targets, free);
533c21
         dev->targets = stonith__parse_targets(result->action_stdout);
533c21
@@ -2310,15 +2309,14 @@ log_async_result(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
     GString *msg = g_string_sized_new(80); // Reasonable starting size
533c21
 
533c21
     // Choose log levels appropriately if we have a result
533c21
-    if ((result->execution_status == PCMK_EXEC_DONE)
533c21
-        && (result->exit_status == CRM_EX_OK))  { // Success
533c21
+    if (pcmk__result_ok(result)) {
533c21
         log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE;
533c21
         if ((result->action_stdout != NULL)
533c21
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
533c21
             output_log_level = LOG_DEBUG;
533c21
         }
533c21
         next = NULL;
533c21
-    } else { // Failure
533c21
+    } else {
533c21
         log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR;
533c21
         if ((result->action_stdout != NULL)
533c21
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
533c21
@@ -2482,7 +2480,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
     /* The device is ready to do something else now */
533c21
     device = g_hash_table_lookup(device_list, cmd->device);
533c21
     if (device) {
533c21
-        if (!device->verified && (result->exit_status == CRM_EX_OK) &&
533c21
+        if (!device->verified && pcmk__result_ok(result) &&
533c21
             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
533c21
 
533c21
             device->verified = TRUE;
533c21
@@ -2491,7 +2489,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
         mainloop_set_trigger(device->work);
533c21
     }
533c21
 
533c21
-    if (result->exit_status == CRM_EX_OK) {
533c21
+    if (pcmk__result_ok(result)) {
533c21
         GList *iter;
533c21
         /* see if there are any required devices left to execute for this op */
533c21
         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
533c21
@@ -2523,7 +2521,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 
533c21
     send_async_reply(cmd, result, pid, false);
533c21
 
533c21
-    if (result->exit_status != CRM_EX_OK) {
533c21
+    if (!pcmk__result_ok(result)) {
533c21
         goto done;
533c21
     }
533c21
 
533c21
diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h
533c21
index 6befaa0ed..0c5833937 100644
533c21
--- a/include/crm/common/results_internal.h
533c21
+++ b/include/crm/common/results_internal.h
533c21
@@ -54,4 +54,20 @@ void pcmk__set_result_output(pcmk__action_result_t *result,
533c21
 
533c21
 void pcmk__reset_result(pcmk__action_result_t *result);
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Check whether a result is OK
533c21
+ *
533c21
+ * \param[in] result
533c21
+ *
533c21
+ * \return true if the result's exit status is CRM_EX_OK and its
533c21
+ *         execution status is PCMK_EXEC_DONE, otherwise false
533c21
+ */
533c21
+static inline bool
533c21
+pcmk__result_ok(const pcmk__action_result_t *result)
533c21
+{
533c21
+    return (result != NULL) && (result->exit_status == CRM_EX_OK)
533c21
+            && (result->execution_status == PCMK_EXEC_DONE);
533c21
+}
533c21
+
533c21
 #endif // PCMK__COMMON_RESULTS_INTERNAL__H
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 2fbff7f24..af461d0d4 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -760,7 +760,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
533c21
         default:                        break;
533c21
     }
533c21
 
533c21
-    if (result->exit_status == CRM_EX_OK) {
533c21
+    if (pcmk__result_ok(result)) {
533c21
         return pcmk_rc_ok;
533c21
     }
533c21
 
533c21
@@ -797,7 +797,7 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
 
533c21
     log_action(action, action->pid);
533c21
 
533c21
-    if ((action->result.exit_status != CRM_EX_OK)
533c21
+    if (!pcmk__result_ok(&(action->result))
533c21
         && update_remaining_timeout(action)) {
533c21
 
533c21
         int rc = internal_stonith_action_execute(action);
533c21
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
533c21
index 6c8cbedc7..865e04bc2 100644
533c21
--- a/lib/fencing/st_rhcs.c
533c21
+++ b/lib/fencing/st_rhcs.c
533c21
@@ -148,7 +148,7 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
533c21
         return rc;
533c21
     }
533c21
 
533c21
-    if (result->exit_status != CRM_EX_OK) {
533c21
+    if (!pcmk__result_ok(result)) {
533c21
         crm_warn("Metadata action for %s returned error code %d",
533c21
                  agent, result->exit_status);
533c21
         rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 4c39ff00a0c028354a9da7f80986f7e34b05ba08 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Fri, 12 Nov 2021 16:07:01 -0600
533c21
Subject: [PATCH 03/13] Low: fencing: improve mapping of execution status to
533c21
 legacy return code
533c21
533c21
PCMK_EXEC_PENDING is likely not possible with the current code, but map it to
533c21
EINPROGRESS for completeness.
533c21
533c21
PCMK_EXEC_INVALID is not yet used by the fencer but will be.
533c21
---
533c21
 lib/fencing/st_client.c | 30 ++++++++++++++++++++++++++----
533c21
 1 file changed, 26 insertions(+), 4 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index af461d0d4..93513e9f3 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -749,7 +749,12 @@ update_remaining_timeout(stonith_action_t * action)
533c21
 int
533c21
 stonith__result2rc(const pcmk__action_result_t *result)
533c21
 {
533c21
+    if (pcmk__result_ok(result)) {
533c21
+        return pcmk_rc_ok;
533c21
+    }
533c21
+
533c21
     switch (result->execution_status) {
533c21
+        case PCMK_EXEC_PENDING:         return EINPROGRESS;
533c21
         case PCMK_EXEC_CANCELLED:       return ECANCELED;
533c21
         case PCMK_EXEC_TIMEOUT:         return ETIME;
533c21
         case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
533c21
@@ -757,11 +762,28 @@ stonith__result2rc(const pcmk__action_result_t *result)
533c21
         case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
533c21
         case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
533c21
         case PCMK_EXEC_NO_SECRETS:      return EACCES;
533c21
-        default:                        break;
533c21
-    }
533c21
 
533c21
-    if (pcmk__result_ok(result)) {
533c21
-        return pcmk_rc_ok;
533c21
+        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
533c21
+         * operations that don't involve executing an agent (for example,
533c21
+         * registering devices). This allows us to use the CRM_EX_* codes in the
533c21
+         * exit status for finer-grained responses.
533c21
+         */
533c21
+        case PCMK_EXEC_INVALID:
533c21
+            switch (result->exit_status) {
533c21
+                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
533c21
+                case CRM_EX_PROTOCOL:           return EPROTO;
533c21
+
533c21
+               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
533c21
+                * over from a previous instance of the fencer. For API backward
533c21
+                * compatibility, this is mapped to the previously used code for
533c21
+                * this case, EHOSTUNREACH.
533c21
+                */
533c21
+                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
533c21
+                default:                        break;
533c21
+            }
533c21
+
533c21
+        default:
533c21
+            break;
533c21
     }
533c21
 
533c21
     // Try to provide useful error code based on result's error output
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 4e638783d1cd7c9398a603fc6df7e9d868262b16 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 18 Nov 2021 11:41:12 -0600
533c21
Subject: [PATCH 04/13] Refactor: libstonithd: separate action-related code
533c21
 into own source file
533c21
533c21
Everything related to stonith_action_t has been moved from st_client.c to a new
533c21
st_actions.c, since st_client.c was ridiculously large, and the action stuff
533c21
isn't all client-related. No code was changed.
533c21
533c21
Before:
533c21
   2804 st_client.c
533c21
533c21
After:
533c21
   545 lib/fencing/st_actions.c
533c21
  2278 lib/fencing/st_client.c
533c21
---
533c21
 lib/fencing/Makefile.am  |   2 +-
533c21
 lib/fencing/st_actions.c | 545 +++++++++++++++++++++++++++++++++++++++
533c21
 lib/fencing/st_client.c  | 528 +------------------------------------
533c21
 3 files changed, 547 insertions(+), 528 deletions(-)
533c21
 create mode 100644 lib/fencing/st_actions.c
533c21
533c21
diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am
533c21
index 205c4873d..dac215c16 100644
533c21
--- a/lib/fencing/Makefile.am
533c21
+++ b/lib/fencing/Makefile.am
533c21
@@ -22,7 +22,7 @@ libstonithd_la_LDFLAGS	+= $(LDFLAGS_HARDENED_LIB)
533c21
 libstonithd_la_LIBADD	= $(top_builddir)/lib/common/libcrmcommon.la
533c21
 libstonithd_la_LIBADD   += $(top_builddir)/lib/services/libcrmservice.la
533c21
 
533c21
-libstonithd_la_SOURCES	= st_client.c st_output.c st_rhcs.c
533c21
+libstonithd_la_SOURCES	= st_actions.c st_client.c st_output.c st_rhcs.c
533c21
 if BUILD_LHA_SUPPORT
533c21
 libstonithd_la_SOURCES	+= st_lha.c
533c21
 endif
533c21
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
533c21
new file mode 100644
533c21
index 000000000..64d3afd5d
533c21
--- /dev/null
533c21
+++ b/lib/fencing/st_actions.c
533c21
@@ -0,0 +1,545 @@
533c21
+/*
533c21
+ * Copyright 2004-2021 the Pacemaker project contributors
533c21
+ *
533c21
+ * The version control history for this file may have further details.
533c21
+ *
533c21
+ * This source code is licensed under the GNU Lesser General Public License
533c21
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
533c21
+ */
533c21
+
533c21
+#include <crm_internal.h>
533c21
+
533c21
+#include <stdlib.h>
533c21
+#include <stdio.h>
533c21
+#include <string.h>
533c21
+#include <libgen.h>
533c21
+#include <inttypes.h>
533c21
+#include <sys/types.h>
533c21
+#include <glib.h>
533c21
+
533c21
+#include <crm/crm.h>
533c21
+#include <crm/stonith-ng.h>
533c21
+#include <crm/fencing/internal.h>
533c21
+#include <crm/msg_xml.h>
533c21
+#include <crm/services_internal.h>
533c21
+
533c21
+#include "fencing_private.h"
533c21
+
533c21
+struct stonith_action_s {
533c21
+    /*! user defined data */
533c21
+    char *agent;
533c21
+    char *action;
533c21
+    char *victim;
533c21
+    GHashTable *args;
533c21
+    int timeout;
533c21
+    int async;
533c21
+    void *userdata;
533c21
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
533c21
+                     void *user_data);
533c21
+    void (*fork_cb) (int pid, void *user_data);
533c21
+
533c21
+    svc_action_t *svc_action;
533c21
+
533c21
+    /*! internal timing information */
533c21
+    time_t initial_start_time;
533c21
+    int tries;
533c21
+    int remaining_timeout;
533c21
+    int max_retries;
533c21
+
533c21
+    int pid;
533c21
+    pcmk__action_result_t result;
533c21
+};
533c21
+
533c21
+static int internal_stonith_action_execute(stonith_action_t *action);
533c21
+static void log_action(stonith_action_t *action, pid_t pid);
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Set an action's result based on services library result
533c21
+ *
533c21
+ * \param[in] action      Fence action to set result for
533c21
+ * \param[in] svc_action  Service action to get result from
533c21
+ */
533c21
+static void
533c21
+set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
533c21
+{
533c21
+    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
533c21
+                     services__exit_reason(svc_action));
533c21
+    pcmk__set_result_output(&(action->result),
533c21
+                            services__grab_stdout(svc_action),
533c21
+                            services__grab_stderr(svc_action));
533c21
+}
533c21
+
533c21
+static void
533c21
+log_action(stonith_action_t *action, pid_t pid)
533c21
+{
533c21
+    /* The services library has already logged the output at info or debug
533c21
+     * level, so just raise to warning for stderr.
533c21
+     */
533c21
+    if (action->result.action_stderr != NULL) {
533c21
+        /* Logging the whole string confuses syslog when the string is xml */
533c21
+        char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
533c21
+
533c21
+        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
533c21
+        free(prefix);
533c21
+    }
533c21
+}
533c21
+
533c21
+static void
533c21
+append_config_arg(gpointer key, gpointer value, gpointer user_data)
533c21
+{
533c21
+    /* The fencer will filter "action" out when it registers the device,
533c21
+     * but ignore it here in case any external API users don't.
533c21
+     *
533c21
+     * Also filter out parameters handled directly by Pacemaker.
533c21
+     */
533c21
+    if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei)
533c21
+        && !pcmk_stonith_param(key)
533c21
+        && (strstr(key, CRM_META) == NULL)
533c21
+        && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) {
533c21
+
533c21
+        crm_trace("Passing %s=%s with fence action",
533c21
+                  (const char *) key, (const char *) (value? value : ""));
533c21
+        g_hash_table_insert((GHashTable *) user_data,
533c21
+                            strdup(key), strdup(value? value : ""));
533c21
+    }
533c21
+}
533c21
+
533c21
+static GHashTable *
533c21
+make_args(const char *agent, const char *action, const char *victim,
533c21
+          uint32_t victim_nodeid, GHashTable * device_args,
533c21
+          GHashTable * port_map, const char *host_arg)
533c21
+{
533c21
+    GHashTable *arg_list = NULL;
533c21
+    const char *value = NULL;
533c21
+
533c21
+    CRM_CHECK(action != NULL, return NULL);
533c21
+
533c21
+    arg_list = pcmk__strkey_table(free, free);
533c21
+
533c21
+    // Add action to arguments (using an alias if requested)
533c21
+    if (device_args) {
533c21
+        char buffer[512];
533c21
+
533c21
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
533c21
+        value = g_hash_table_lookup(device_args, buffer);
533c21
+        if (value) {
533c21
+            crm_debug("Substituting '%s' for fence action %s targeting %s",
533c21
+                      value, action, victim);
533c21
+            action = value;
533c21
+        }
533c21
+    }
533c21
+    g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP),
533c21
+                        strdup(action));
533c21
+
533c21
+    /* If this is a fencing operation against another node, add more standard
533c21
+     * arguments.
533c21
+     */
533c21
+    if (victim && device_args) {
533c21
+        const char *param = NULL;
533c21
+
533c21
+        /* Always pass the target's name, per
533c21
+         * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md
533c21
+         */
533c21
+        g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim));
533c21
+
533c21
+        // If the target's node ID was specified, pass it, too
533c21
+        if (victim_nodeid) {
533c21
+            char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid);
533c21
+
533c21
+            // cts-fencing looks for this log message
533c21
+            crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s",
533c21
+                     nodeid, action, victim);
533c21
+            g_hash_table_insert(arg_list, strdup("nodeid"), nodeid);
533c21
+        }
533c21
+
533c21
+        // Check whether target must be specified in some other way
533c21
+        param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT);
533c21
+        if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none)
533c21
+            && !pcmk__str_eq(param, "none", pcmk__str_casei)) {
533c21
+
533c21
+            if (param == NULL) {
533c21
+                /* Use the caller's default for pcmk_host_argument, or "port" if
533c21
+                 * none was given
533c21
+                 */
533c21
+                param = (host_arg == NULL)? "port" : host_arg;
533c21
+            }
533c21
+            value = g_hash_table_lookup(device_args, param);
533c21
+
533c21
+            if (pcmk__str_eq(value, "dynamic",
533c21
+                             pcmk__str_casei|pcmk__str_null_matches)) {
533c21
+                /* If the host argument was "dynamic" or not explicitly specified,
533c21
+                 * add it with the target
533c21
+                 */
533c21
+                const char *alias = NULL;
533c21
+
533c21
+                if (port_map) {
533c21
+                    alias = g_hash_table_lookup(port_map, victim);
533c21
+                }
533c21
+                if (alias == NULL) {
533c21
+                    alias = victim;
533c21
+                }
533c21
+                crm_debug("Passing %s='%s' with fence action %s targeting %s",
533c21
+                          param, alias, action, victim);
533c21
+                g_hash_table_insert(arg_list, strdup(param), strdup(alias));
533c21
+            }
533c21
+        }
533c21
+    }
533c21
+
533c21
+    if (device_args) {
533c21
+        g_hash_table_foreach(device_args, append_config_arg, arg_list);
533c21
+    }
533c21
+
533c21
+    return arg_list;
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Free all memory used by a stonith action
533c21
+ *
533c21
+ * \param[in,out] action  Action to free
533c21
+ */
533c21
+void
533c21
+stonith__destroy_action(stonith_action_t *action)
533c21
+{
533c21
+    if (action) {
533c21
+        free(action->agent);
533c21
+        if (action->args) {
533c21
+            g_hash_table_destroy(action->args);
533c21
+        }
533c21
+        free(action->action);
533c21
+        free(action->victim);
533c21
+        if (action->svc_action) {
533c21
+            services_action_free(action->svc_action);
533c21
+        }
533c21
+        pcmk__reset_result(&(action->result));
533c21
+        free(action);
533c21
+    }
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Get the result of an executed stonith action
533c21
+ *
533c21
+ * \param[in] action  Executed action
533c21
+ *
533c21
+ * \return Pointer to action's result (or NULL if \p action is NULL)
533c21
+ */
533c21
+pcmk__action_result_t *
533c21
+stonith__action_result(stonith_action_t *action)
533c21
+{
533c21
+    return (action == NULL)? NULL : &(action->result);
533c21
+}
533c21
+
533c21
+#define FAILURE_MAX_RETRIES 2
533c21
+stonith_action_t *
533c21
+stonith_action_create(const char *agent,
533c21
+                      const char *_action,
533c21
+                      const char *victim,
533c21
+                      uint32_t victim_nodeid,
533c21
+                      int timeout, GHashTable * device_args,
533c21
+                      GHashTable * port_map, const char *host_arg)
533c21
+{
533c21
+    stonith_action_t *action;
533c21
+
533c21
+    action = calloc(1, sizeof(stonith_action_t));
533c21
+    action->args = make_args(agent, _action, victim, victim_nodeid,
533c21
+                             device_args, port_map, host_arg);
533c21
+    crm_debug("Preparing '%s' action for %s using agent %s",
533c21
+              _action, (victim? victim : "no target"), agent);
533c21
+    action->agent = strdup(agent);
533c21
+    action->action = strdup(_action);
533c21
+    if (victim) {
533c21
+        action->victim = strdup(victim);
533c21
+    }
533c21
+    action->timeout = action->remaining_timeout = timeout;
533c21
+    action->max_retries = FAILURE_MAX_RETRIES;
533c21
+
533c21
+    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
533c21
+                     "Initialization bug in fencing library");
533c21
+
533c21
+    if (device_args) {
533c21
+        char buffer[512];
533c21
+        const char *value = NULL;
533c21
+
533c21
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
533c21
+        value = g_hash_table_lookup(device_args, buffer);
533c21
+
533c21
+        if (value) {
533c21
+            action->max_retries = atoi(value);
533c21
+        }
533c21
+    }
533c21
+
533c21
+    return action;
533c21
+}
533c21
+
533c21
+static gboolean
533c21
+update_remaining_timeout(stonith_action_t * action)
533c21
+{
533c21
+    int diff = time(NULL) - action->initial_start_time;
533c21
+
533c21
+    if (action->tries >= action->max_retries) {
533c21
+        crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
533c21
+                 action->agent, action->action, action->max_retries);
533c21
+        action->remaining_timeout = 0;
533c21
+    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
533c21
+               && (diff < (action->timeout * 0.7))) {
533c21
+        /* only set remaining timeout period if there is 30%
533c21
+         * or greater of the original timeout period left */
533c21
+        action->remaining_timeout = action->timeout - diff;
533c21
+    } else {
533c21
+        action->remaining_timeout = 0;
533c21
+    }
533c21
+    return action->remaining_timeout ? TRUE : FALSE;
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Map a fencing action result to a standard return code
533c21
+ *
533c21
+ * \param[in] result  Fencing action result to map
533c21
+ *
533c21
+ * \return Standard Pacemaker return code that best corresponds to \p result
533c21
+ */
533c21
+int
533c21
+stonith__result2rc(const pcmk__action_result_t *result)
533c21
+{
533c21
+    if (pcmk__result_ok(result)) {
533c21
+        return pcmk_rc_ok;
533c21
+    }
533c21
+
533c21
+    switch (result->execution_status) {
533c21
+        case PCMK_EXEC_PENDING:         return EINPROGRESS;
533c21
+        case PCMK_EXEC_CANCELLED:       return ECANCELED;
533c21
+        case PCMK_EXEC_TIMEOUT:         return ETIME;
533c21
+        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
533c21
+        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
533c21
+        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
533c21
+        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
533c21
+        case PCMK_EXEC_NO_SECRETS:      return EACCES;
533c21
+
533c21
+        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
533c21
+         * operations that don't involve executing an agent (for example,
533c21
+         * registering devices). This allows us to use the CRM_EX_* codes in the
533c21
+         * exit status for finer-grained responses.
533c21
+         */
533c21
+        case PCMK_EXEC_INVALID:
533c21
+            switch (result->exit_status) {
533c21
+                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
533c21
+                case CRM_EX_PROTOCOL:           return EPROTO;
533c21
+
533c21
+               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
533c21
+                * over from a previous instance of the fencer. For API backward
533c21
+                * compatibility, this is mapped to the previously used code for
533c21
+                * this case, EHOSTUNREACH.
533c21
+                */
533c21
+                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
533c21
+                default:                        break;
533c21
+            }
533c21
+
533c21
+        default:
533c21
+            break;
533c21
+    }
533c21
+
533c21
+    // Try to provide useful error code based on result's error output
533c21
+
533c21
+    if (result->action_stderr == NULL) {
533c21
+        return ENODATA;
533c21
+
533c21
+    } else if (strcasestr(result->action_stderr, "timed out")
533c21
+               || strcasestr(result->action_stderr, "timeout")) {
533c21
+        return ETIME;
533c21
+
533c21
+    } else if (strcasestr(result->action_stderr, "unrecognised action")
533c21
+               || strcasestr(result->action_stderr, "unrecognized action")
533c21
+               || strcasestr(result->action_stderr, "unsupported action")) {
533c21
+        return EOPNOTSUPP;
533c21
+    }
533c21
+
533c21
+    // Oh well, we tried
533c21
+    return pcmk_rc_error;
533c21
+}
533c21
+
533c21
+static void
533c21
+stonith_action_async_done(svc_action_t *svc_action)
533c21
+{
533c21
+    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
533c21
+
533c21
+    set_result_from_svc_action(action, svc_action);
533c21
+
533c21
+    svc_action->params = NULL;
533c21
+
533c21
+    crm_debug("Child process %d performing action '%s' exited with rc %d",
533c21
+                action->pid, action->action, svc_action->rc);
533c21
+
533c21
+    log_action(action, action->pid);
533c21
+
533c21
+    if (!pcmk__result_ok(&(action->result))
533c21
+        && update_remaining_timeout(action)) {
533c21
+
533c21
+        int rc = internal_stonith_action_execute(action);
533c21
+        if (rc == pcmk_ok) {
533c21
+            return;
533c21
+        }
533c21
+    }
533c21
+
533c21
+    if (action->done_cb) {
533c21
+        action->done_cb(action->pid, &(action->result), action->userdata);
533c21
+    }
533c21
+
533c21
+    action->svc_action = NULL; // don't remove our caller
533c21
+    stonith__destroy_action(action);
533c21
+}
533c21
+
533c21
+static void
533c21
+stonith_action_async_forked(svc_action_t *svc_action)
533c21
+{
533c21
+    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
533c21
+
533c21
+    action->pid = svc_action->pid;
533c21
+    action->svc_action = svc_action;
533c21
+
533c21
+    if (action->fork_cb) {
533c21
+        (action->fork_cb) (svc_action->pid, action->userdata);
533c21
+    }
533c21
+
533c21
+    crm_trace("Child process %d performing action '%s' successfully forked",
533c21
+              action->pid, action->action);
533c21
+}
533c21
+
533c21
+static int
533c21
+internal_stonith_action_execute(stonith_action_t * action)
533c21
+{
533c21
+    int rc = -EPROTO;
533c21
+    int is_retry = 0;
533c21
+    svc_action_t *svc_action = NULL;
533c21
+    static int stonith_sequence = 0;
533c21
+    char *buffer = NULL;
533c21
+
533c21
+    CRM_CHECK(action != NULL, return -EINVAL);
533c21
+
533c21
+    if ((action->action == NULL) || (action->args == NULL)
533c21
+        || (action->agent == NULL)) {
533c21
+        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
533c21
+                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
533c21
+        return -EINVAL;
533c21
+    }
533c21
+
533c21
+    if (!action->tries) {
533c21
+        action->initial_start_time = time(NULL);
533c21
+    }
533c21
+    action->tries++;
533c21
+
533c21
+    if (action->tries > 1) {
533c21
+        crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
533c21
+                 action->tries, action->agent, action->action, action->remaining_timeout);
533c21
+        is_retry = 1;
533c21
+    }
533c21
+
533c21
+    buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s",
533c21
+                               basename(action->agent));
533c21
+    svc_action = services_action_create_generic(buffer, NULL);
533c21
+    free(buffer);
533c21
+
533c21
+    if (svc_action->rc != PCMK_OCF_UNKNOWN) {
533c21
+        set_result_from_svc_action(action, svc_action);
533c21
+        services_action_free(svc_action);
533c21
+        return -E2BIG;
533c21
+    }
533c21
+
533c21
+    svc_action->timeout = 1000 * action->remaining_timeout;
533c21
+    svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH);
533c21
+    svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent),
533c21
+                                       action->action, action->tries);
533c21
+    svc_action->agent = strdup(action->agent);
533c21
+    svc_action->sequence = stonith_sequence++;
533c21
+    svc_action->params = action->args;
533c21
+    svc_action->cb_data = (void *) action;
533c21
+    svc_action->flags = pcmk__set_flags_as(__func__, __LINE__,
533c21
+                                           LOG_TRACE, "Action",
533c21
+                                           svc_action->id, svc_action->flags,
533c21
+                                           SVC_ACTION_NON_BLOCKED,
533c21
+                                           "SVC_ACTION_NON_BLOCKED");
533c21
+
533c21
+    /* keep retries from executing out of control and free previous results */
533c21
+    if (is_retry) {
533c21
+        pcmk__reset_result(&(action->result));
533c21
+        sleep(1);
533c21
+    }
533c21
+
533c21
+    if (action->async) {
533c21
+        /* async */
533c21
+        if (services_action_async_fork_notify(svc_action,
533c21
+                                              &stonith_action_async_done,
533c21
+                                              &stonith_action_async_forked)) {
533c21
+            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
533c21
+                             PCMK_EXEC_PENDING, NULL);
533c21
+            return pcmk_ok;
533c21
+        }
533c21
+
533c21
+    } else if (services_action_sync(svc_action)) { // sync success
533c21
+        rc = pcmk_ok;
533c21
+
533c21
+    } else { // sync failure
533c21
+        rc = -ECONNABORTED;
533c21
+    }
533c21
+
533c21
+    set_result_from_svc_action(action, svc_action);
533c21
+    svc_action->params = NULL;
533c21
+    services_action_free(svc_action);
533c21
+    return rc;
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Kick off execution of an async stonith action
533c21
+ *
533c21
+ * \param[in,out] action        Action to be executed
533c21
+ * \param[in,out] userdata      Datapointer to be passed to callbacks
533c21
+ * \param[in]     done          Callback to notify action has failed/succeeded
533c21
+ * \param[in]     fork_callback Callback to notify successful fork of child
533c21
+ *
533c21
+ * \return pcmk_ok if ownership of action has been taken, -errno otherwise
533c21
+ */
533c21
+int
533c21
+stonith_action_execute_async(stonith_action_t * action,
533c21
+                             void *userdata,
533c21
+                             void (*done) (int pid,
533c21
+                                           const pcmk__action_result_t *result,
533c21
+                                           void *user_data),
533c21
+                             void (*fork_cb) (int pid, void *user_data))
533c21
+{
533c21
+    if (!action) {
533c21
+        return -EINVAL;
533c21
+    }
533c21
+
533c21
+    action->userdata = userdata;
533c21
+    action->done_cb = done;
533c21
+    action->fork_cb = fork_cb;
533c21
+    action->async = 1;
533c21
+
533c21
+    return internal_stonith_action_execute(action);
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Execute a stonith action
533c21
+ *
533c21
+ * \param[in,out] action  Action to execute
533c21
+ *
533c21
+ * \return pcmk_ok on success, -errno otherwise
533c21
+ */
533c21
+int
533c21
+stonith__execute(stonith_action_t *action)
533c21
+{
533c21
+    int rc = pcmk_ok;
533c21
+
533c21
+    CRM_CHECK(action != NULL, return -EINVAL);
533c21
+
533c21
+    // Keep trying until success, max retries, or timeout
533c21
+    do {
533c21
+        rc = internal_stonith_action_execute(action);
533c21
+    } while ((rc != pcmk_ok) && update_remaining_timeout(action));
533c21
+
533c21
+    return rc;
533c21
+}
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 93513e9f3..944cd1863 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -8,28 +8,20 @@
533c21
  */
533c21
 
533c21
 #include <crm_internal.h>
533c21
-#include <unistd.h>
533c21
+
533c21
 #include <stdlib.h>
533c21
 #include <stdio.h>
533c21
 #include <stdbool.h>
533c21
 #include <string.h>
533c21
 #include <ctype.h>
533c21
-#include <libgen.h>
533c21
 #include <inttypes.h>
533c21
-
533c21
-#include <sys/stat.h>
533c21
 #include <sys/types.h>
533c21
-#include <sys/wait.h>
533c21
-
533c21
 #include <glib.h>
533c21
 
533c21
 #include <crm/crm.h>
533c21
 #include <crm/stonith-ng.h>
533c21
 #include <crm/fencing/internal.h>
533c21
 #include <crm/msg_xml.h>
533c21
-#include <crm/common/xml.h>
533c21
-#include <crm/common/xml_internal.h>
533c21
-#include <crm/services_internal.h>
533c21
 
533c21
 #include <crm/common/mainloop.h>
533c21
 
533c21
@@ -37,31 +29,6 @@
533c21
 
533c21
 CRM_TRACE_INIT_DATA(stonith);
533c21
 
533c21
-struct stonith_action_s {
533c21
-    /*! user defined data */
533c21
-    char *agent;
533c21
-    char *action;
533c21
-    char *victim;
533c21
-    GHashTable *args;
533c21
-    int timeout;
533c21
-    int async;
533c21
-    void *userdata;
533c21
-    void (*done_cb) (int pid, const pcmk__action_result_t *result,
533c21
-                     void *user_data);
533c21
-    void (*fork_cb) (int pid, void *user_data);
533c21
-
533c21
-    svc_action_t *svc_action;
533c21
-
533c21
-    /*! internal timing information */
533c21
-    time_t initial_start_time;
533c21
-    int tries;
533c21
-    int remaining_timeout;
533c21
-    int max_retries;
533c21
-
533c21
-    int pid;
533c21
-    pcmk__action_result_t result;
533c21
-};
533c21
-
533c21
 typedef struct stonith_private_s {
533c21
     char *token;
533c21
     crm_ipc_t *ipc;
533c21
@@ -118,8 +85,6 @@ static int stonith_send_command(stonith_t *stonith, const char *op,
533c21
 
533c21
 static void stonith_connection_destroy(gpointer user_data);
533c21
 static void stonith_send_notification(gpointer data, gpointer user_data);
533c21
-static int internal_stonith_action_execute(stonith_action_t * action);
533c21
-static void log_action(stonith_action_t *action, pid_t pid);
533c21
 
533c21
 /*!
533c21
  * \brief Get agent namespace by name
533c21
@@ -196,23 +161,6 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
533c21
     return st_namespace_invalid;
533c21
 }
533c21
 
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Set an action's result based on services library result
533c21
- *
533c21
- * \param[in] action      Fence action to set result for
533c21
- * \param[in] svc_action  Service action to get result from
533c21
- */
533c21
-static void
533c21
-set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
533c21
-{
533c21
-    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
533c21
-                     services__exit_reason(svc_action));
533c21
-    pcmk__set_result_output(&(action->result),
533c21
-                            services__grab_stdout(svc_action),
533c21
-                            services__grab_stderr(svc_action));
533c21
-}
533c21
-
533c21
 gboolean
533c21
 stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
533c21
 {
533c21
@@ -273,21 +221,6 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
533c21
     return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
533c21
 }
533c21
 
533c21
-static void
533c21
-log_action(stonith_action_t *action, pid_t pid)
533c21
-{
533c21
-    /* The services library has already logged the output at info or debug
533c21
-     * level, so just raise to warning for stderr.
533c21
-     */
533c21
-    if (action->result.action_stderr != NULL) {
533c21
-        /* Logging the whole string confuses syslog when the string is xml */
533c21
-        char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
533c21
-
533c21
-        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
533c21
-        free(prefix);
533c21
-    }
533c21
-}
533c21
-
533c21
 /* when cycling through the list we don't want to delete items
533c21
    so just mark them and when we know nobody is using the list
533c21
    loop over it to remove the marked items
533c21
@@ -530,465 +463,6 @@ stonith_api_register_level(stonith_t * st, int options, const char *node, int le
533c21
                                            level, device_list);
533c21
 }
533c21
 
533c21
-static void
533c21
-append_config_arg(gpointer key, gpointer value, gpointer user_data)
533c21
-{
533c21
-    /* The fencer will filter "action" out when it registers the device,
533c21
-     * but ignore it here in case any external API users don't.
533c21
-     *
533c21
-     * Also filter out parameters handled directly by Pacemaker.
533c21
-     */
533c21
-    if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei)
533c21
-        && !pcmk_stonith_param(key)
533c21
-        && (strstr(key, CRM_META) == NULL)
533c21
-        && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) {
533c21
-
533c21
-        crm_trace("Passing %s=%s with fence action",
533c21
-                  (const char *) key, (const char *) (value? value : ""));
533c21
-        g_hash_table_insert((GHashTable *) user_data,
533c21
-                            strdup(key), strdup(value? value : ""));
533c21
-    }
533c21
-}
533c21
-
533c21
-static GHashTable *
533c21
-make_args(const char *agent, const char *action, const char *victim,
533c21
-          uint32_t victim_nodeid, GHashTable * device_args,
533c21
-          GHashTable * port_map, const char *host_arg)
533c21
-{
533c21
-    GHashTable *arg_list = NULL;
533c21
-    const char *value = NULL;
533c21
-
533c21
-    CRM_CHECK(action != NULL, return NULL);
533c21
-
533c21
-    arg_list = pcmk__strkey_table(free, free);
533c21
-
533c21
-    // Add action to arguments (using an alias if requested)
533c21
-    if (device_args) {
533c21
-        char buffer[512];
533c21
-
533c21
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action);
533c21
-        value = g_hash_table_lookup(device_args, buffer);
533c21
-        if (value) {
533c21
-            crm_debug("Substituting '%s' for fence action %s targeting %s",
533c21
-                      value, action, victim);
533c21
-            action = value;
533c21
-        }
533c21
-    }
533c21
-    g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP),
533c21
-                        strdup(action));
533c21
-
533c21
-    /* If this is a fencing operation against another node, add more standard
533c21
-     * arguments.
533c21
-     */
533c21
-    if (victim && device_args) {
533c21
-        const char *param = NULL;
533c21
-
533c21
-        /* Always pass the target's name, per
533c21
-         * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md
533c21
-         */
533c21
-        g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim));
533c21
-
533c21
-        // If the target's node ID was specified, pass it, too
533c21
-        if (victim_nodeid) {
533c21
-            char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid);
533c21
-
533c21
-            // cts-fencing looks for this log message
533c21
-            crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s",
533c21
-                     nodeid, action, victim);
533c21
-            g_hash_table_insert(arg_list, strdup("nodeid"), nodeid);
533c21
-        }
533c21
-
533c21
-        // Check whether target must be specified in some other way
533c21
-        param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT);
533c21
-        if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none)
533c21
-            && !pcmk__str_eq(param, "none", pcmk__str_casei)) {
533c21
-
533c21
-            if (param == NULL) {
533c21
-                /* Use the caller's default for pcmk_host_argument, or "port" if
533c21
-                 * none was given
533c21
-                 */
533c21
-                param = (host_arg == NULL)? "port" : host_arg;
533c21
-            }
533c21
-            value = g_hash_table_lookup(device_args, param);
533c21
-
533c21
-            if (pcmk__str_eq(value, "dynamic",
533c21
-                             pcmk__str_casei|pcmk__str_null_matches)) {
533c21
-                /* If the host argument was "dynamic" or not explicitly specified,
533c21
-                 * add it with the target
533c21
-                 */
533c21
-                const char *alias = NULL;
533c21
-
533c21
-                if (port_map) {
533c21
-                    alias = g_hash_table_lookup(port_map, victim);
533c21
-                }
533c21
-                if (alias == NULL) {
533c21
-                    alias = victim;
533c21
-                }
533c21
-                crm_debug("Passing %s='%s' with fence action %s targeting %s",
533c21
-                          param, alias, action, victim);
533c21
-                g_hash_table_insert(arg_list, strdup(param), strdup(alias));
533c21
-            }
533c21
-        }
533c21
-    }
533c21
-
533c21
-    if (device_args) {
533c21
-        g_hash_table_foreach(device_args, append_config_arg, arg_list);
533c21
-    }
533c21
-
533c21
-    return arg_list;
533c21
-}
533c21
-
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Free all memory used by a stonith action
533c21
- *
533c21
- * \param[in,out] action  Action to free
533c21
- */
533c21
-void
533c21
-stonith__destroy_action(stonith_action_t *action)
533c21
-{
533c21
-    if (action) {
533c21
-        free(action->agent);
533c21
-        if (action->args) {
533c21
-            g_hash_table_destroy(action->args);
533c21
-        }
533c21
-        free(action->action);
533c21
-        free(action->victim);
533c21
-        if (action->svc_action) {
533c21
-            services_action_free(action->svc_action);
533c21
-        }
533c21
-        pcmk__reset_result(&(action->result));
533c21
-        free(action);
533c21
-    }
533c21
-}
533c21
-
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Get the result of an executed stonith action
533c21
- *
533c21
- * \param[in] action  Executed action
533c21
- *
533c21
- * \return Pointer to action's result (or NULL if \p action is NULL)
533c21
- */
533c21
-pcmk__action_result_t *
533c21
-stonith__action_result(stonith_action_t *action)
533c21
-{
533c21
-    return (action == NULL)? NULL : &(action->result);
533c21
-}
533c21
-
533c21
-#define FAILURE_MAX_RETRIES 2
533c21
-stonith_action_t *
533c21
-stonith_action_create(const char *agent,
533c21
-                      const char *_action,
533c21
-                      const char *victim,
533c21
-                      uint32_t victim_nodeid,
533c21
-                      int timeout, GHashTable * device_args,
533c21
-                      GHashTable * port_map, const char *host_arg)
533c21
-{
533c21
-    stonith_action_t *action;
533c21
-
533c21
-    action = calloc(1, sizeof(stonith_action_t));
533c21
-    action->args = make_args(agent, _action, victim, victim_nodeid,
533c21
-                             device_args, port_map, host_arg);
533c21
-    crm_debug("Preparing '%s' action for %s using agent %s",
533c21
-              _action, (victim? victim : "no target"), agent);
533c21
-    action->agent = strdup(agent);
533c21
-    action->action = strdup(_action);
533c21
-    if (victim) {
533c21
-        action->victim = strdup(victim);
533c21
-    }
533c21
-    action->timeout = action->remaining_timeout = timeout;
533c21
-    action->max_retries = FAILURE_MAX_RETRIES;
533c21
-
533c21
-    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
533c21
-                     "Initialization bug in fencing library");
533c21
-
533c21
-    if (device_args) {
533c21
-        char buffer[512];
533c21
-        const char *value = NULL;
533c21
-
533c21
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
533c21
-        value = g_hash_table_lookup(device_args, buffer);
533c21
-
533c21
-        if (value) {
533c21
-            action->max_retries = atoi(value);
533c21
-        }
533c21
-    }
533c21
-
533c21
-    return action;
533c21
-}
533c21
-
533c21
-static gboolean
533c21
-update_remaining_timeout(stonith_action_t * action)
533c21
-{
533c21
-    int diff = time(NULL) - action->initial_start_time;
533c21
-
533c21
-    if (action->tries >= action->max_retries) {
533c21
-        crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
533c21
-                 action->agent, action->action, action->max_retries);
533c21
-        action->remaining_timeout = 0;
533c21
-    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
533c21
-               && (diff < (action->timeout * 0.7))) {
533c21
-        /* only set remaining timeout period if there is 30%
533c21
-         * or greater of the original timeout period left */
533c21
-        action->remaining_timeout = action->timeout - diff;
533c21
-    } else {
533c21
-        action->remaining_timeout = 0;
533c21
-    }
533c21
-    return action->remaining_timeout ? TRUE : FALSE;
533c21
-}
533c21
-
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Map a fencing action result to a standard return code
533c21
- *
533c21
- * \param[in] result  Fencing action result to map
533c21
- *
533c21
- * \return Standard Pacemaker return code that best corresponds to \p result
533c21
- */
533c21
-int
533c21
-stonith__result2rc(const pcmk__action_result_t *result)
533c21
-{
533c21
-    if (pcmk__result_ok(result)) {
533c21
-        return pcmk_rc_ok;
533c21
-    }
533c21
-
533c21
-    switch (result->execution_status) {
533c21
-        case PCMK_EXEC_PENDING:         return EINPROGRESS;
533c21
-        case PCMK_EXEC_CANCELLED:       return ECANCELED;
533c21
-        case PCMK_EXEC_TIMEOUT:         return ETIME;
533c21
-        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
533c21
-        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
533c21
-        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
533c21
-        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
533c21
-        case PCMK_EXEC_NO_SECRETS:      return EACCES;
533c21
-
533c21
-        /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API
533c21
-         * operations that don't involve executing an agent (for example,
533c21
-         * registering devices). This allows us to use the CRM_EX_* codes in the
533c21
-         * exit status for finer-grained responses.
533c21
-         */
533c21
-        case PCMK_EXEC_INVALID:
533c21
-            switch (result->exit_status) {
533c21
-                case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
533c21
-                case CRM_EX_PROTOCOL:           return EPROTO;
533c21
-
533c21
-               /* CRM_EX_EXPIRED is used for orphaned fencing operations left
533c21
-                * over from a previous instance of the fencer. For API backward
533c21
-                * compatibility, this is mapped to the previously used code for
533c21
-                * this case, EHOSTUNREACH.
533c21
-                */
533c21
-                case CRM_EX_EXPIRED:            return EHOSTUNREACH;
533c21
-                default:                        break;
533c21
-            }
533c21
-
533c21
-        default:
533c21
-            break;
533c21
-    }
533c21
-
533c21
-    // Try to provide useful error code based on result's error output
533c21
-
533c21
-    if (result->action_stderr == NULL) {
533c21
-        return ENODATA;
533c21
-
533c21
-    } else if (strcasestr(result->action_stderr, "timed out")
533c21
-               || strcasestr(result->action_stderr, "timeout")) {
533c21
-        return ETIME;
533c21
-
533c21
-    } else if (strcasestr(result->action_stderr, "unrecognised action")
533c21
-               || strcasestr(result->action_stderr, "unrecognized action")
533c21
-               || strcasestr(result->action_stderr, "unsupported action")) {
533c21
-        return EOPNOTSUPP;
533c21
-    }
533c21
-
533c21
-    // Oh well, we tried
533c21
-    return pcmk_rc_error;
533c21
-}
533c21
-
533c21
-static void
533c21
-stonith_action_async_done(svc_action_t *svc_action)
533c21
-{
533c21
-    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
533c21
-
533c21
-    set_result_from_svc_action(action, svc_action);
533c21
-
533c21
-    svc_action->params = NULL;
533c21
-
533c21
-    crm_debug("Child process %d performing action '%s' exited with rc %d",
533c21
-                action->pid, action->action, svc_action->rc);
533c21
-
533c21
-    log_action(action, action->pid);
533c21
-
533c21
-    if (!pcmk__result_ok(&(action->result))
533c21
-        && update_remaining_timeout(action)) {
533c21
-
533c21
-        int rc = internal_stonith_action_execute(action);
533c21
-        if (rc == pcmk_ok) {
533c21
-            return;
533c21
-        }
533c21
-    }
533c21
-
533c21
-    if (action->done_cb) {
533c21
-        action->done_cb(action->pid, &(action->result), action->userdata);
533c21
-    }
533c21
-
533c21
-    action->svc_action = NULL; // don't remove our caller
533c21
-    stonith__destroy_action(action);
533c21
-}
533c21
-
533c21
-static void
533c21
-stonith_action_async_forked(svc_action_t *svc_action)
533c21
-{
533c21
-    stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
533c21
-
533c21
-    action->pid = svc_action->pid;
533c21
-    action->svc_action = svc_action;
533c21
-
533c21
-    if (action->fork_cb) {
533c21
-        (action->fork_cb) (svc_action->pid, action->userdata);
533c21
-    }
533c21
-
533c21
-    crm_trace("Child process %d performing action '%s' successfully forked",
533c21
-              action->pid, action->action);
533c21
-}
533c21
-
533c21
-static int
533c21
-internal_stonith_action_execute(stonith_action_t * action)
533c21
-{
533c21
-    int rc = -EPROTO;
533c21
-    int is_retry = 0;
533c21
-    svc_action_t *svc_action = NULL;
533c21
-    static int stonith_sequence = 0;
533c21
-    char *buffer = NULL;
533c21
-
533c21
-    CRM_CHECK(action != NULL, return -EINVAL);
533c21
-
533c21
-    if ((action->action == NULL) || (action->args == NULL)
533c21
-        || (action->agent == NULL)) {
533c21
-        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
533c21
-                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
533c21
-        return -EINVAL;
533c21
-    }
533c21
-
533c21
-    if (!action->tries) {
533c21
-        action->initial_start_time = time(NULL);
533c21
-    }
533c21
-    action->tries++;
533c21
-
533c21
-    if (action->tries > 1) {
533c21
-        crm_info("Attempt %d to execute %s (%s). remaining timeout is %d",
533c21
-                 action->tries, action->agent, action->action, action->remaining_timeout);
533c21
-        is_retry = 1;
533c21
-    }
533c21
-
533c21
-    buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s",
533c21
-                               basename(action->agent));
533c21
-    svc_action = services_action_create_generic(buffer, NULL);
533c21
-    free(buffer);
533c21
-
533c21
-    if (svc_action->rc != PCMK_OCF_UNKNOWN) {
533c21
-        set_result_from_svc_action(action, svc_action);
533c21
-        services_action_free(svc_action);
533c21
-        return -E2BIG;
533c21
-    }
533c21
-
533c21
-    svc_action->timeout = 1000 * action->remaining_timeout;
533c21
-    svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH);
533c21
-    svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent),
533c21
-                                       action->action, action->tries);
533c21
-    svc_action->agent = strdup(action->agent);
533c21
-    svc_action->sequence = stonith_sequence++;
533c21
-    svc_action->params = action->args;
533c21
-    svc_action->cb_data = (void *) action;
533c21
-    svc_action->flags = pcmk__set_flags_as(__func__, __LINE__,
533c21
-                                           LOG_TRACE, "Action",
533c21
-                                           svc_action->id, svc_action->flags,
533c21
-                                           SVC_ACTION_NON_BLOCKED,
533c21
-                                           "SVC_ACTION_NON_BLOCKED");
533c21
-
533c21
-    /* keep retries from executing out of control and free previous results */
533c21
-    if (is_retry) {
533c21
-        pcmk__reset_result(&(action->result));
533c21
-        sleep(1);
533c21
-    }
533c21
-
533c21
-    if (action->async) {
533c21
-        /* async */
533c21
-        if (services_action_async_fork_notify(svc_action,
533c21
-                                              &stonith_action_async_done,
533c21
-                                              &stonith_action_async_forked)) {
533c21
-            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
533c21
-                             PCMK_EXEC_PENDING, NULL);
533c21
-            return pcmk_ok;
533c21
-        }
533c21
-
533c21
-    } else if (services_action_sync(svc_action)) { // sync success
533c21
-        rc = pcmk_ok;
533c21
-
533c21
-    } else { // sync failure
533c21
-        rc = -ECONNABORTED;
533c21
-    }
533c21
-
533c21
-    set_result_from_svc_action(action, svc_action);
533c21
-    svc_action->params = NULL;
533c21
-    services_action_free(svc_action);
533c21
-    return rc;
533c21
-}
533c21
-
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Kick off execution of an async stonith action
533c21
- *
533c21
- * \param[in,out] action        Action to be executed
533c21
- * \param[in,out] userdata      Datapointer to be passed to callbacks
533c21
- * \param[in]     done          Callback to notify action has failed/succeeded
533c21
- * \param[in]     fork_callback Callback to notify successful fork of child
533c21
- *
533c21
- * \return pcmk_ok if ownership of action has been taken, -errno otherwise
533c21
- */
533c21
-int
533c21
-stonith_action_execute_async(stonith_action_t * action,
533c21
-                             void *userdata,
533c21
-                             void (*done) (int pid,
533c21
-                                           const pcmk__action_result_t *result,
533c21
-                                           void *user_data),
533c21
-                             void (*fork_cb) (int pid, void *user_data))
533c21
-{
533c21
-    if (!action) {
533c21
-        return -EINVAL;
533c21
-    }
533c21
-
533c21
-    action->userdata = userdata;
533c21
-    action->done_cb = done;
533c21
-    action->fork_cb = fork_cb;
533c21
-    action->async = 1;
533c21
-
533c21
-    return internal_stonith_action_execute(action);
533c21
-}
533c21
-
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Execute a stonith action
533c21
- *
533c21
- * \param[in,out] action  Action to execute
533c21
- *
533c21
- * \return pcmk_ok on success, -errno otherwise
533c21
- */
533c21
-int
533c21
-stonith__execute(stonith_action_t *action)
533c21
-{
533c21
-    int rc = pcmk_ok;
533c21
-
533c21
-    CRM_CHECK(action != NULL, return -EINVAL);
533c21
-
533c21
-    // Keep trying until success, max retries, or timeout
533c21
-    do {
533c21
-        rc = internal_stonith_action_execute(action);
533c21
-    } while ((rc != pcmk_ok) && update_remaining_timeout(action));
533c21
-
533c21
-    return rc;
533c21
-}
533c21
-
533c21
 static int
533c21
 stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace,
533c21
                         stonith_key_value_t ** devices, int timeout)
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 883a3cf7d3f73d02417d3997a7885dd5a7bebac7 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Wed, 10 Nov 2021 15:39:17 -0600
533c21
Subject: [PATCH 05/13] Low: fencing,executor: improve mapping of legacy return
533c21
 code to execution status
533c21
533c21
Move stonith_rc2status() from the executor to the fencing library for future
533c21
reuse, exposing it internally as stonith__legacy2status(). Update it to use
533c21
recently added execution status codes.
533c21
---
533c21
 daemons/execd/execd_commands.c | 66 ++++++++--------------------------
533c21
 include/crm/fencing/internal.h |  2 ++
533c21
 lib/fencing/st_actions.c       | 36 +++++++++++++++++++
533c21
 3 files changed, 52 insertions(+), 52 deletions(-)
533c21
533c21
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
533c21
index 02070bf11..0ccaa1ced 100644
533c21
--- a/daemons/execd/execd_commands.c
533c21
+++ b/daemons/execd/execd_commands.c
533c21
@@ -21,6 +21,7 @@
533c21
 #include <unistd.h>
533c21
 
533c21
 #include <crm/crm.h>
533c21
+#include <crm/fencing/internal.h>
533c21
 #include <crm/services.h>
533c21
 #include <crm/services_internal.h>
533c21
 #include <crm/common/mainloop.h>
533c21
@@ -999,56 +1000,6 @@ action_complete(svc_action_t * action)
533c21
     cmd_finalize(cmd, rsc);
533c21
 }
533c21
 
533c21
-/*!
533c21
- * \internal
533c21
- * \brief Determine operation status of a stonith operation
533c21
- *
533c21
- * Non-stonith resource operations get their operation status directly from the
533c21
- * service library, but the fencer does not have an equivalent, so we must infer
533c21
- * an operation status from the fencer API's return code.
533c21
- *
533c21
- * \param[in] action       Name of action performed on stonith resource
533c21
- * \param[in] interval_ms  Action interval
533c21
- * \param[in] rc           Action result from fencer
533c21
- *
533c21
- * \return Operation status corresponding to fencer API return code
533c21
- */
533c21
-static int
533c21
-stonith_rc2status(const char *action, guint interval_ms, int rc)
533c21
-{
533c21
-    int status = PCMK_EXEC_DONE;
533c21
-
533c21
-    switch (rc) {
533c21
-        case pcmk_ok:
533c21
-            break;
533c21
-
533c21
-        case -EOPNOTSUPP:
533c21
-        case -EPROTONOSUPPORT:
533c21
-            status = PCMK_EXEC_NOT_SUPPORTED;
533c21
-            break;
533c21
-
533c21
-        case -ETIME:
533c21
-        case -ETIMEDOUT:
533c21
-            status = PCMK_EXEC_TIMEOUT;
533c21
-            break;
533c21
-
533c21
-        case -ENOTCONN:
533c21
-        case -ECOMM:
533c21
-            // Couldn't talk to fencer
533c21
-            status = PCMK_EXEC_ERROR;
533c21
-            break;
533c21
-
533c21
-        case -ENODEV:
533c21
-            // The device is not registered with the fencer
533c21
-            status = PCMK_EXEC_ERROR;
533c21
-            break;
533c21
-
533c21
-        default:
533c21
-            break;
533c21
-    }
533c21
-    return status;
533c21
-}
533c21
-
533c21
 static void
533c21
 stonith_action_complete(lrmd_cmd_t * cmd, int rc)
533c21
 {
533c21
@@ -1062,8 +1013,19 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
533c21
      * the fencer return code.
533c21
      */
533c21
     if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) {
533c21
-        cmd->result.execution_status = stonith_rc2status(cmd->action,
533c21
-                                                         cmd->interval_ms, rc);
533c21
+        cmd->result.execution_status = stonith__legacy2status(rc);
533c21
+
533c21
+        // Simplify status codes from fencer
533c21
+        switch (cmd->result.execution_status) {
533c21
+            case PCMK_EXEC_NOT_CONNECTED:
533c21
+            case PCMK_EXEC_INVALID:
533c21
+            case PCMK_EXEC_NO_FENCE_DEVICE:
533c21
+            case PCMK_EXEC_NO_SECRETS:
533c21
+                cmd->result.execution_status = PCMK_EXEC_ERROR;
533c21
+                break;
533c21
+            default:
533c21
+                break;
533c21
+        }
533c21
 
533c21
         // Certain successful actions change the known state of the resource
533c21
         if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index 6a7e4232c..80f6443be 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -182,6 +182,8 @@ bool stonith__event_state_pending(stonith_history_t *history, void *user_data);
533c21
 bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
533c21
 bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
533c21
 
533c21
+int stonith__legacy2status(int rc);
533c21
+
533c21
 /*!
533c21
  * \internal
533c21
  * \brief Is a fencing operation in pending state?
533c21
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
533c21
index 64d3afd5d..9e785595a 100644
533c21
--- a/lib/fencing/st_actions.c
533c21
+++ b/lib/fencing/st_actions.c
533c21
@@ -360,6 +360,42 @@ stonith__result2rc(const pcmk__action_result_t *result)
533c21
     return pcmk_rc_error;
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Determine execution status equivalent of legacy fencer return code
533c21
+ *
533c21
+ * Fence action notifications, and fence action callbacks from older fencers
533c21
+ * (<=2.1.2) in a rolling upgrade, will have only a legacy return code. Map this
533c21
+ * to an execution status as best as possible (essentially, the inverse of
533c21
+ * stonith__result2rc()).
533c21
+ *
533c21
+ * \param[in] rc           Legacy return code from fencer
533c21
+ *
533c21
+ * \return Execution status best corresponding to \p rc
533c21
+ */
533c21
+int
533c21
+stonith__legacy2status(int rc)
533c21
+{
533c21
+    if (rc >= 0) {
533c21
+        return PCMK_EXEC_DONE;
533c21
+    }
533c21
+    switch (-rc) {
533c21
+        case EACCES:            return PCMK_EXEC_NO_SECRETS;
533c21
+        case ECANCELED:         return PCMK_EXEC_CANCELLED;
533c21
+        case EHOSTUNREACH:      return PCMK_EXEC_INVALID;
533c21
+        case EINPROGRESS:       return PCMK_EXEC_PENDING;
533c21
+        case ENODEV:            return PCMK_EXEC_NO_FENCE_DEVICE;
533c21
+        case ENOENT:            return PCMK_EXEC_NOT_INSTALLED;
533c21
+        case ENOTCONN:          return PCMK_EXEC_NOT_CONNECTED;
533c21
+        case EOPNOTSUPP:        return PCMK_EXEC_NOT_SUPPORTED;
533c21
+        case EPROTO:            return PCMK_EXEC_INVALID;
533c21
+        case EPROTONOSUPPORT:   return PCMK_EXEC_NOT_SUPPORTED;
533c21
+        case ETIME:             return PCMK_EXEC_TIMEOUT;
533c21
+        case ETIMEDOUT:         return PCMK_EXEC_TIMEOUT;
533c21
+        default:                return PCMK_EXEC_ERROR;
533c21
+    }
533c21
+}
533c21
+
533c21
 static void
533c21
 stonith_action_async_done(svc_action_t *svc_action)
533c21
 {
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 639a9f4a2cbeb6cc41b754a1dcb1f360a9500e03 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 11 Nov 2021 16:54:32 -0600
533c21
Subject: [PATCH 06/13] Refactor: fencing: add functions for getting/setting
533c21
 result via XML
533c21
533c21
These will come in handy as we update the various fencer messages to include a
533c21
full result rather than just a legacy return code. The functions are in a new
533c21
source file fenced_messages.c which can have other stuff moved to it later.
533c21
---
533c21
 include/crm/fencing/internal.h |   3 +
533c21
 lib/fencing/st_actions.c       | 107 +++++++++++++++++++++++++++++++++
533c21
 2 files changed, 110 insertions(+)
533c21
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index 80f6443be..4b5fd3959 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -60,6 +60,9 @@ stonith_action_t *stonith_action_create(const char *agent,
533c21
 void stonith__destroy_action(stonith_action_t *action);
533c21
 pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
533c21
 int stonith__result2rc(const pcmk__action_result_t *result);
533c21
+void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
533c21
+void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result);
533c21
+xmlNode *stonith__find_xe_with_result(xmlNode *xml);
533c21
 
533c21
 int
533c21
 stonith_action_execute_async(stonith_action_t * action,
533c21
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
533c21
index 9e785595a..d4fc3f5ed 100644
533c21
--- a/lib/fencing/st_actions.c
533c21
+++ b/lib/fencing/st_actions.c
533c21
@@ -396,6 +396,113 @@ stonith__legacy2status(int rc)
533c21
     }
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Add a fencing result to an XML element as attributes
533c21
+ *
533c21
+ * \param[in] xml     XML element to add result to
533c21
+ * \param[in] result  Fencing result to add (assume success if NULL)
533c21
+ */
533c21
+void
533c21
+stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result)
533c21
+{
533c21
+    int exit_status = CRM_EX_OK;
533c21
+    enum pcmk_exec_status execution_status = PCMK_EXEC_DONE;
533c21
+    const char *exit_reason = NULL;
533c21
+    const char *action_stdout = NULL;
533c21
+    int rc = pcmk_ok;
533c21
+
533c21
+    CRM_CHECK(xml != NULL, return);
533c21
+
533c21
+    if (result != NULL) {
533c21
+        exit_status = result->exit_status;
533c21
+        execution_status = result->execution_status;
533c21
+        exit_reason = result->exit_reason;
533c21
+        action_stdout = result->action_stdout;
533c21
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
+    }
533c21
+
533c21
+    crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status);
533c21
+    crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status);
533c21
+    crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason);
533c21
+    crm_xml_add(xml, "st_output", action_stdout);
533c21
+
533c21
+    /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external
533c21
+     * code that use libstonithd <=2.1.2 don't check for the full result, and
533c21
+     * need a legacy return code instead.
533c21
+     */
533c21
+    crm_xml_add_int(xml, F_STONITH_RC, rc);
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Find a fencing result beneath an XML element
533c21
+ *
533c21
+ * \param[in]  xml     XML element to search
533c21
+ *
533c21
+ * \return \p xml or descendent of it that contains a fencing result, else NULL
533c21
+ */
533c21
+xmlNode *
533c21
+stonith__find_xe_with_result(xmlNode *xml)
533c21
+{
533c21
+    xmlNode *match = get_xpath_object("//@" XML_LRM_ATTR_RC, xml, LOG_NEVER);
533c21
+
533c21
+    if (match == NULL) {
533c21
+        /* @COMPAT Peers <=2.1.2 in a rolling upgrade provide only a legacy
533c21
+         * return code, not a full result, so check for that.
533c21
+         */
533c21
+        match = get_xpath_object("//@" F_STONITH_RC, xml, LOG_ERR);
533c21
+    }
533c21
+    return match;
533c21
+}
533c21
+
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Get a fencing result from an XML element's attributes
533c21
+ *
533c21
+ * \param[in]  xml     XML element with fencing result
533c21
+ * \param[out] result  Where to store fencing result
533c21
+ */
533c21
+void
533c21
+stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result)
533c21
+{
533c21
+    int exit_status = CRM_EX_OK;
533c21
+    int execution_status = PCMK_EXEC_DONE;
533c21
+    const char *exit_reason = NULL;
533c21
+    char *action_stdout = NULL;
533c21
+
533c21
+    CRM_CHECK((xml != NULL) && (result != NULL), return);
533c21
+
533c21
+    exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON);
533c21
+    action_stdout = crm_element_value_copy(xml, "st_output");
533c21
+
533c21
+    // A result must include an exit status and execution status
533c21
+    if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0)
533c21
+        || (crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS,
533c21
+                                  &execution_status) < 0)) {
533c21
+        int rc = pcmk_ok;
533c21
+        exit_status = CRM_EX_ERROR;
533c21
+
533c21
+        /* @COMPAT Peers <=2.1.2 in rolling upgrades provide only a legacy
533c21
+         * return code, not a full result, so check for that.
533c21
+         */
533c21
+        if (crm_element_value_int(xml, F_STONITH_RC, &rc) == 0) {
533c21
+            if ((rc == pcmk_ok) || (rc == -EINPROGRESS)) {
533c21
+                exit_status = CRM_EX_OK;
533c21
+            }
533c21
+            execution_status = stonith__legacy2status(rc);
533c21
+            exit_reason = pcmk_strerror(rc);
533c21
+
533c21
+        } else {
533c21
+            execution_status = PCMK_EXEC_ERROR;
533c21
+            exit_reason = "Fencer reply contained neither a full result "
533c21
+                          "nor a legacy return code (bug?)";
533c21
+        }
533c21
+    }
533c21
+    pcmk__set_result(result, exit_status, execution_status, exit_reason);
533c21
+    pcmk__set_result_output(result, action_stdout, NULL);
533c21
+}
533c21
+
533c21
 static void
533c21
 stonith_action_async_done(svc_action_t *svc_action)
533c21
 {
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 1f0121c6ad0d0235bcf01c8b60f9153592b3db83 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 11 Nov 2021 10:10:53 -0600
533c21
Subject: [PATCH 07/13] Refactor: fencing: rename functions for invoking fence
533c21
 callbacks
533c21
533c21
... to make it clearer what the difference between them is
533c21
---
533c21
 lib/fencing/st_client.c | 44 +++++++++++++++++++++++++++++++++--------
533c21
 1 file changed, 36 insertions(+), 8 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 944cd1863..dfc5860fc 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -847,9 +847,21 @@ stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
533c21
     return pcmk_ok;
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Invoke a (single) specified fence action callback
533c21
+ *
533c21
+ * \param[in] st        Fencer API connection
533c21
+ * \param[in] call_id   If positive, call ID of completed fence action, otherwise
533c21
+ *                      legacy return code for early action failure
533c21
+ * \param[in] rc        Legacy return code for action result
533c21
+ * \param[in] userdata  User data to pass to callback
533c21
+ * \param[in] callback  Fence action callback to invoke
533c21
+ */
533c21
 static void
533c21
-invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
533c21
-                void (*callback) (stonith_t * st, stonith_callback_data_t * data))
533c21
+invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata,
533c21
+                             void (*callback) (stonith_t *st,
533c21
+                                               stonith_callback_data_t *data))
533c21
 {
533c21
     stonith_callback_data_t data = { 0, };
533c21
 
533c21
@@ -860,8 +872,21 @@ invoke_callback(stonith_t * st, int call_id, int rc, void *userdata,
533c21
     callback(st, &data);
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Invoke any callbacks registered for a specified fence action result
533c21
+ *
533c21
+ * Given a fence action result from the fencer, invoke any callback registered
533c21
+ * for that action, as well as any global callback registered.
533c21
+ *
533c21
+ * \param[in] st        Fencer API connection
533c21
+ * \param[in] msg       If non-NULL, fencer reply
533c21
+ * \param[in] call_id   If \p msg is NULL, call ID of action that timed out
533c21
+ * \param[in] rc        Legacy return code for result of action
533c21
+ */
533c21
 static void
533c21
-stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc)
533c21
+invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
533c21
+                            int rc)
533c21
 {
533c21
     stonith_private_t *private = NULL;
533c21
     stonith_callback_client_t *blob = NULL;
533c21
@@ -899,7 +924,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc
533c21
 
533c21
     if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
533c21
         crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
533c21
-        invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback);
533c21
+        invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data,
533c21
+                                     local_blob.callback);
533c21
 
533c21
     } else if (private->op_callback == NULL && rc != pcmk_ok) {
533c21
         crm_warn("Fencing command failed: %s", pcmk_strerror(rc));
533c21
@@ -908,7 +934,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc
533c21
 
533c21
     if (private->op_callback != NULL) {
533c21
         crm_trace("Invoking global callback for call %d", call_id);
533c21
-        invoke_callback(stonith, call_id, rc, NULL, private->op_callback);
533c21
+        invoke_fence_action_callback(stonith, call_id, rc, NULL,
533c21
+                                     private->op_callback);
533c21
     }
533c21
     crm_trace("OP callback activated.");
533c21
 }
533c21
@@ -919,7 +946,7 @@ stonith_async_timeout_handler(gpointer data)
533c21
     struct timer_rec_s *timer = data;
533c21
 
533c21
     crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
533c21
-    stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME);
533c21
+    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME);
533c21
 
533c21
     /* Always return TRUE, never remove the handler
533c21
      * We do that in stonith_del_callback()
533c21
@@ -994,7 +1021,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
533c21
     crm_trace("Activating %s callbacks...", type);
533c21
 
533c21
     if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) {
533c21
-        stonith_perform_callback(st, blob.xml, 0, 0);
533c21
+        invoke_registered_callbacks(st, blob.xml, 0, 0);
533c21
 
533c21
     } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) {
533c21
         foreach_notify_entry(private, stonith_send_notification, &blob;;
533c21
@@ -1229,7 +1256,8 @@ stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int opti
533c21
     } else if (call_id < 0) {
533c21
         if (!(options & st_opt_report_only_success)) {
533c21
             crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
533c21
-            invoke_callback(stonith, call_id, call_id, user_data, callback);
533c21
+            invoke_fence_action_callback(stonith, call_id, call_id, user_data,
533c21
+                                         callback);
533c21
         } else {
533c21
             crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
533c21
         }
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From c32f11e70a88244f5a3217608055a4eaf8d28231 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 11 Nov 2021 10:21:00 -0600
533c21
Subject: [PATCH 08/13] Refactor: fencing: drop unnecessary argument when
533c21
 invoking callbacks
533c21
533c21
Refactor invoke_registered_callbacks() to treat a NULL message as a timeout, so
533c21
we can drop the rc argument.
533c21
---
533c21
 lib/fencing/st_client.c | 17 +++++++++++------
533c21
 1 file changed, 11 insertions(+), 6 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index dfc5860fc..9f2b0c1c1 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -882,15 +882,14 @@ invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata,
533c21
  * \param[in] st        Fencer API connection
533c21
  * \param[in] msg       If non-NULL, fencer reply
533c21
  * \param[in] call_id   If \p msg is NULL, call ID of action that timed out
533c21
- * \param[in] rc        Legacy return code for result of action
533c21
  */
533c21
 static void
533c21
-invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
533c21
-                            int rc)
533c21
+invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
 {
533c21
     stonith_private_t *private = NULL;
533c21
     stonith_callback_client_t *blob = NULL;
533c21
     stonith_callback_client_t local_blob;
533c21
+    int rc = pcmk_ok;
533c21
 
533c21
     CRM_CHECK(stonith != NULL, return);
533c21
     CRM_CHECK(stonith->st_private != NULL, return);
533c21
@@ -902,7 +901,13 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id,
533c21
     local_blob.user_data = NULL;
533c21
     local_blob.only_success = FALSE;
533c21
 
533c21
-    if (msg != NULL) {
533c21
+    if (msg == NULL) {
533c21
+        // Fencer didn't reply in time
533c21
+        rc = -ETIME;
533c21
+
533c21
+    } else {
533c21
+        // We have the fencer reply
533c21
+
533c21
         crm_element_value_int(msg, F_STONITH_RC, &rc);
533c21
         crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
533c21
     }
533c21
@@ -946,7 +951,7 @@ stonith_async_timeout_handler(gpointer data)
533c21
     struct timer_rec_s *timer = data;
533c21
 
533c21
     crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout);
533c21
-    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME);
533c21
+    invoke_registered_callbacks(timer->stonith, NULL, timer->call_id);
533c21
 
533c21
     /* Always return TRUE, never remove the handler
533c21
      * We do that in stonith_del_callback()
533c21
@@ -1021,7 +1026,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata)
533c21
     crm_trace("Activating %s callbacks...", type);
533c21
 
533c21
     if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) {
533c21
-        invoke_registered_callbacks(st, blob.xml, 0, 0);
533c21
+        invoke_registered_callbacks(st, blob.xml, 0);
533c21
 
533c21
     } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) {
533c21
         foreach_notify_entry(private, stonith_send_notification, &blob;;
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 5d8279b51ea9df738354649e4065663f2c16f1e6 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 11 Nov 2021 10:21:57 -0600
533c21
Subject: [PATCH 09/13] Log: fencing: improve message for callback errors
533c21
533c21
Improve checking of fencer replies, which also allows us to distinguish an
533c21
internal bug from a bad fencer reply in logs. Lower the bad reply message to
533c21
warning.
533c21
---
533c21
 lib/fencing/st_client.c | 13 +++++++++----
533c21
 1 file changed, 9 insertions(+), 4 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 9f2b0c1c1..170b9d450 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -904,15 +904,20 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
     if (msg == NULL) {
533c21
         // Fencer didn't reply in time
533c21
         rc = -ETIME;
533c21
+        CRM_LOG_ASSERT(call_id > 0);
533c21
 
533c21
     } else {
533c21
         // We have the fencer reply
533c21
 
533c21
-        crm_element_value_int(msg, F_STONITH_RC, &rc);
533c21
-        crm_element_value_int(msg, F_STONITH_CALLID, &call_id);
533c21
-    }
533c21
+        if (crm_element_value_int(msg, F_STONITH_RC, &rc) != 0) {
533c21
+            rc = -pcmk_err_generic;
533c21
+        }
533c21
 
533c21
-    CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result"));
533c21
+        if ((crm_element_value_int(msg, F_STONITH_CALLID, &call_id) != 0)
533c21
+            || (call_id <= 0)) {
533c21
+            crm_log_xml_warn(msg, "Bad fencer reply");
533c21
+        }
533c21
+    }
533c21
 
533c21
     blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
533c21
                                      call_id);
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From e03c14d24e8cb011e870b9460930d139705bf0a2 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 9 Nov 2021 14:59:12 -0600
533c21
Subject: [PATCH 10/13] Doc: fencing: correct stonith_api_operations_t method
533c21
 descriptions
533c21
533c21
Many of the methods return a positive call ID on success
533c21
---
533c21
 include/crm/stonith-ng.h | 60 ++++++++++++++++++++++------------------
533c21
 1 file changed, 33 insertions(+), 27 deletions(-)
533c21
533c21
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
533c21
index 8d6ad477d..9643820e9 100644
533c21
--- a/include/crm/stonith-ng.h
533c21
+++ b/include/crm/stonith-ng.h
533c21
@@ -164,39 +164,38 @@ typedef struct stonith_api_operations_s
533c21
     int (*disconnect)(stonith_t *st);
533c21
 
533c21
     /*!
533c21
-     * \brief Remove a registered stonith device with the local stonith daemon.
533c21
+     * \brief Unregister a fence device with the local fencer
533c21
      *
533c21
-     * \note Synchronous, guaranteed to occur in daemon before function returns.
533c21
-     *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*remove_device)(
533c21
         stonith_t *st, int options, const char *name);
533c21
 
533c21
     /*!
533c21
-     * \brief Register a stonith device with the local stonith daemon.
533c21
+     * \brief Register a fence device with the local fencer
533c21
      *
533c21
-     * \note Synchronous, guaranteed to occur in daemon before function returns.
533c21
-     *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*register_device)(
533c21
         stonith_t *st, int options, const char *id,
533c21
         const char *provider, const char *agent, stonith_key_value_t *params);
533c21
 
533c21
     /*!
533c21
-     * \brief Remove a fencing level for a specific node.
533c21
+     * \brief Unregister a fencing level for specified node with local fencer
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*remove_level)(
533c21
         stonith_t *st, int options, const char *node, int level);
533c21
 
533c21
     /*!
533c21
-     * \brief Register a fencing level containing the fencing devices to be used
533c21
-     *        at that level for a specific node.
533c21
+     * \brief Register a fencing level for specified node with local fencer
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*register_level)(
533c21
         stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
533c21
@@ -226,21 +225,24 @@ typedef struct stonith_api_operations_s
533c21
     /*!
533c21
      * \brief Retrieve string listing hosts and port assignments from a local stonith device.
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout);
533c21
 
533c21
     /*!
533c21
      * \brief Check to see if a local stonith device is reachable
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*monitor)(stonith_t *st, int options, const char *id, int timeout);
533c21
 
533c21
     /*!
533c21
      * \brief Check to see if a local stonith device's port is reachable
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout);
533c21
 
533c21
@@ -267,7 +269,8 @@ typedef struct stonith_api_operations_s
533c21
      * \param timeout, The default per device timeout to use with each device
533c21
      *                 capable of fencing the target.
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*fence)(stonith_t *st, int options, const char *node, const char *action,
533c21
                  int timeout, int tolerance);
533c21
@@ -275,7 +278,8 @@ typedef struct stonith_api_operations_s
533c21
     /*!
533c21
      * \brief Manually confirm that a node is down.
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*confirm)(stonith_t *st, int options, const char *node);
533c21
 
533c21
@@ -304,9 +308,6 @@ typedef struct stonith_api_operations_s
533c21
      * \param[in] callback       The callback function to register
533c21
      *
533c21
      * \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise
533c21
-     *
533c21
-     * \todo This function should return \c pcmk_ok on success, and \c call_id
533c21
-     *       when negative, but that would break backward compatibility.
533c21
      */
533c21
     int (*register_callback)(stonith_t *st,
533c21
         int call_id,
533c21
@@ -317,12 +318,14 @@ typedef struct stonith_api_operations_s
533c21
         void (*callback)(stonith_t *st, stonith_callback_data_t *data));
533c21
 
533c21
     /*!
533c21
-     * \brief Remove a registered callback for a given call id.
533c21
+     * \brief Remove a registered callback for a given call id
533c21
+     *
533c21
+     * \return pcmk_ok
533c21
      */
533c21
     int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks);
533c21
 
533c21
     /*!
533c21
-     * \brief Remove fencing level for specific node, node regex or attribute
533c21
+     * \brief Unregister fencing level for specified node, pattern or attribute
533c21
      *
533c21
      * \param[in] st      Fencer connection to use
533c21
      * \param[in] options Bitmask of stonith_call_options to pass to the fencer
533c21
@@ -332,7 +335,8 @@ typedef struct stonith_api_operations_s
533c21
      * \param[in] value   If not NULL, target by this node attribute value
533c21
      * \param[in] level   Index number of level to remove
533c21
      *
533c21
-     * \return 0 on success, negative error code otherwise
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      *
533c21
      * \note The caller should set only one of node, pattern or attr/value.
533c21
      */
533c21
@@ -341,7 +345,7 @@ typedef struct stonith_api_operations_s
533c21
                              const char *attr, const char *value, int level);
533c21
 
533c21
     /*!
533c21
-     * \brief Register fencing level for specific node, node regex or attribute
533c21
+     * \brief Register fencing level for specified node, pattern or attribute
533c21
      *
533c21
      * \param[in] st          Fencer connection to use
533c21
      * \param[in] options     Bitmask of stonith_call_options to pass to fencer
533c21
@@ -352,7 +356,8 @@ typedef struct stonith_api_operations_s
533c21
      * \param[in] level       Index number of level to add
533c21
      * \param[in] device_list Devices to use in level
533c21
      *
533c21
-     * \return 0 on success, negative error code otherwise
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      *
533c21
      * \note The caller should set only one of node, pattern or attr/value.
533c21
      */
533c21
@@ -398,7 +403,8 @@ typedef struct stonith_api_operations_s
533c21
      * \param delay, Apply a fencing delay. Value -1 means disable also any
533c21
      *               static/random fencing delays from pcmk_delay_base/max
533c21
      *
533c21
-     * \return Legacy Pacemaker return code
533c21
+     * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous)
533c21
+     *         on success, otherwise a negative legacy Pacemaker return code
533c21
      */
533c21
     int (*fence_with_delay)(stonith_t *st, int options, const char *node, const char *action,
533c21
                             int timeout, int tolerance, int delay);
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 18c382731889b626b21ba6a14f9213ef1e45a524 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 23 Nov 2021 11:14:24 -0600
533c21
Subject: [PATCH 11/13] Refactor: fencing: define constant for XML attribute
533c21
 for action output
533c21
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 4 ++--
533c21
 include/crm/fencing/internal.h   | 1 +
533c21
 lib/fencing/st_actions.c         | 4 ++--
533c21
 lib/fencing/st_client.c          | 2 +-
533c21
 4 files changed, 6 insertions(+), 5 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index 26501a4b3..aa14c52af 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -2677,7 +2677,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
533c21
 
533c21
     crm_xml_add(reply, "st_origin", __func__);
533c21
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
533c21
-    crm_xml_add(reply, "st_output", output);
533c21
+    crm_xml_add(reply, F_STONITH_OUTPUT, output);
533c21
     crm_xml_add_int(reply, F_STONITH_RC, rc);
533c21
 
533c21
     if (request == NULL) {
533c21
@@ -2743,7 +2743,7 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
533c21
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
533c21
     crm_xml_add_int(reply, F_STONITH_RC,
533c21
                     pcmk_rc2legacy(stonith__result2rc(result)));
533c21
-    crm_xml_add(reply, "st_output", result->action_stdout);
533c21
+    crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout);
533c21
     return reply;
533c21
 }
533c21
 
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index 4b5fd3959..f0d294a0b 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -105,6 +105,7 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
533c21
 #  define F_STONITH_REMOTE_OP_ID  "st_remote_op"
533c21
 #  define F_STONITH_REMOTE_OP_ID_RELAY  "st_remote_op_relay"
533c21
 #  define F_STONITH_RC            "st_rc"
533c21
+#  define F_STONITH_OUTPUT        "st_output"
533c21
 /*! Timeout period per a device execution */
533c21
 #  define F_STONITH_TIMEOUT       "st_timeout"
533c21
 #  define F_STONITH_TOLERANCE     "st_tolerance"
533c21
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
533c21
index d4fc3f5ed..5636810a5 100644
533c21
--- a/lib/fencing/st_actions.c
533c21
+++ b/lib/fencing/st_actions.c
533c21
@@ -425,7 +425,7 @@ stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result)
533c21
     crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status);
533c21
     crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status);
533c21
     crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason);
533c21
-    crm_xml_add(xml, "st_output", action_stdout);
533c21
+    crm_xml_add(xml, F_STONITH_OUTPUT, action_stdout);
533c21
 
533c21
     /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external
533c21
      * code that use libstonithd <=2.1.2 don't check for the full result, and
533c21
@@ -474,7 +474,7 @@ stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result)
533c21
     CRM_CHECK((xml != NULL) && (result != NULL), return);
533c21
 
533c21
     exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON);
533c21
-    action_stdout = crm_element_value_copy(xml, "st_output");
533c21
+    action_stdout = crm_element_value_copy(xml, F_STONITH_OUTPUT);
533c21
 
533c21
     // A result must include an exit status and execution status
533c21
     if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0)
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 170b9d450..2dfadf922 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -600,7 +600,7 @@ stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **l
533c21
     if (output && list_info) {
533c21
         const char *list_str;
533c21
 
533c21
-        list_str = crm_element_value(output, "st_output");
533c21
+        list_str = crm_element_value(output, F_STONITH_OUTPUT);
533c21
 
533c21
         if (list_str) {
533c21
             *list_info = strdup(list_str);
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 9fe9ed5d46c810cb9c12eb07271373ab92d271cd Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 23 Nov 2021 11:39:32 -0600
533c21
Subject: [PATCH 12/13] Refactor: fencing: simplify invoking callbacks
533c21
533c21
---
533c21
 lib/fencing/st_client.c | 42 +++++++++++++++++------------------------
533c21
 1 file changed, 17 insertions(+), 25 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 2dfadf922..2ca094566 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -887,8 +887,7 @@ static void
533c21
 invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
 {
533c21
     stonith_private_t *private = NULL;
533c21
-    stonith_callback_client_t *blob = NULL;
533c21
-    stonith_callback_client_t local_blob;
533c21
+    stonith_callback_client_t *cb_info = NULL;
533c21
     int rc = pcmk_ok;
533c21
 
533c21
     CRM_CHECK(stonith != NULL, return);
533c21
@@ -896,11 +895,6 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
 
533c21
     private = stonith->st_private;
533c21
 
533c21
-    local_blob.id = NULL;
533c21
-    local_blob.callback = NULL;
533c21
-    local_blob.user_data = NULL;
533c21
-    local_blob.only_success = FALSE;
533c21
-
533c21
     if (msg == NULL) {
533c21
         // Fencer didn't reply in time
533c21
         rc = -ETIME;
533c21
@@ -919,26 +913,21 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
         }
533c21
     }
533c21
 
533c21
-    blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
533c21
-                                     call_id);
533c21
-    if (blob != NULL) {
533c21
-        local_blob = *blob;
533c21
-        blob = NULL;
533c21
-
533c21
-        stonith_api_del_callback(stonith, call_id, FALSE);
533c21
-
533c21
-    } else {
533c21
-        crm_trace("No callback found for call %d", call_id);
533c21
-        local_blob.callback = NULL;
533c21
+    if (call_id > 0) {
533c21
+        cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table,
533c21
+                                            call_id);
533c21
     }
533c21
 
533c21
-    if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) {
533c21
-        crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id);
533c21
-        invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data,
533c21
-                                     local_blob.callback);
533c21
+    if ((cb_info != NULL) && (cb_info->callback != NULL)
533c21
+        && (rc == pcmk_ok || !(cb_info->only_success))) {
533c21
+        crm_trace("Invoking callback %s for call %d",
533c21
+                  crm_str(cb_info->id), call_id);
533c21
+        invoke_fence_action_callback(stonith, call_id, rc, cb_info->user_data,
533c21
+                                     cb_info->callback);
533c21
 
533c21
-    } else if (private->op_callback == NULL && rc != pcmk_ok) {
533c21
-        crm_warn("Fencing command failed: %s", pcmk_strerror(rc));
533c21
+    } else if ((private->op_callback == NULL) && (rc != pcmk_ok)) {
533c21
+        crm_warn("Fencing action without registered callback failed: %s",
533c21
+                 pcmk_strerror(rc));
533c21
         crm_log_xml_debug(msg, "Failed fence update");
533c21
     }
533c21
 
533c21
@@ -947,7 +936,10 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
         invoke_fence_action_callback(stonith, call_id, rc, NULL,
533c21
                                      private->op_callback);
533c21
     }
533c21
-    crm_trace("OP callback activated.");
533c21
+
533c21
+    if (cb_info != NULL) {
533c21
+        stonith_api_del_callback(stonith, call_id, FALSE);
533c21
+    }
533c21
 }
533c21
 
533c21
 static gboolean
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 8113b800ce677ba17a16ca176e8f6f9b4a042316 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 23 Nov 2021 18:14:48 -0600
533c21
Subject: [PATCH 13/13] Refactor: fencing: add a missing "break" statement
533c21
533c21
No effect, but more correct
533c21
---
533c21
 lib/fencing/st_actions.c | 1 +
533c21
 1 file changed, 1 insertion(+)
533c21
533c21
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
533c21
index 5636810a5..7eaa8b0f2 100644
533c21
--- a/lib/fencing/st_actions.c
533c21
+++ b/lib/fencing/st_actions.c
533c21
@@ -336,6 +336,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
533c21
                 case CRM_EX_EXPIRED:            return EHOSTUNREACH;
533c21
                 default:                        break;
533c21
             }
533c21
+            break;
533c21
 
533c21
         default:
533c21
             break;
533c21
-- 
533c21
2.27.0
533c21