Blob Blame History Raw
From fcd42a5926e9a63d425586552ecc7b543838d352 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Nov 2021 16:57:03 -0600
Subject: [PATCH 01/23] Feature: fencer: pass full result in async command
 replies

The services library callbacks for async commands, which call
send_async_reply() -> construct_async_reply() to create the reply, now add
fields for exit status, operation status, and exit reason, in addition to the
existing action standard output and legacy return code.

Nothing uses the new fields yet.
---
 daemons/fenced/fenced_commands.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index f34cb4f136..3497428c18 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2415,9 +2415,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
     if (stand_alone) {
         /* Do notification with a clean data object */
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
-        int rc = pcmk_rc2legacy(stonith__result2rc(result));
 
-        crm_xml_add_int(notify_data, F_STONITH_RC, rc);
+        stonith__xe_set_result(notify_data, result);
         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
@@ -2425,7 +2424,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
 
-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
+        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
         do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
 }
@@ -2728,9 +2727,8 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
-    crm_xml_add_int(reply, F_STONITH_RC,
-                    pcmk_rc2legacy(stonith__result2rc(result)));
-    crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout);
+
+    stonith__xe_set_result(reply, result);
     return reply;
 }
 
-- 
2.27.0


From 4bac2e9811872f92571e4f5a47d8c5032cfc3016 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 12:41:29 -0600
Subject: [PATCH 02/23] Refactor: fencer: track full result for direct agent
 actions

This renames stonith_device_action() to execute_agent_action() for readability,
and has it set a full result rather than return a legacy return code.

As of this commit, handle_request() just maps the result back to a legacy code,
but it will make better use of it with planned changes.
---
 daemons/fenced/fenced_commands.c | 95 +++++++++++++++++++-------------
 1 file changed, 56 insertions(+), 39 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 3497428c18..2f59ef84b7 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1729,23 +1729,6 @@ stonith_level_remove(xmlNode *msg, char **desc)
     return pcmk_ok;
 }
 
-/*!
- * \internal
- * \brief Schedule an (asynchronous) action directly on a stonith device
- *
- * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
- * directly on a specified device. Only list, monitor, and status actions are
- * expected to use this call, though it should work with any agent command.
- *
- * \param[in]  msg     API message XML with desired action
- * \param[out] output  Unused
- *
- * \return -EINPROGRESS on success, -errno otherwise
- * \note If the action is monitor, the device must be registered via the API
- *       (CIB registration is not sufficient), because monitor should not be
- *       possible unless the device is "started" (API registered).
- */
-
 static char *
 list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
 {
@@ -1778,8 +1761,23 @@ list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
     return rv;
 }
 
-static int
-stonith_device_action(xmlNode * msg, char **output)
+/*!
+ * \internal
+ * \brief Execute a fence agent action directly (and asynchronously)
+ *
+ * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
+ * directly on a specified device. Only list, monitor, and status actions are
+ * expected to use this call, though it should work with any agent command.
+ *
+ * \param[in]  msg     Request XML specifying action
+ * \param[out] result  Where to store result of action
+ *
+ * \note If the action is monitor, the device must be registered via the API
+ *       (CIB registration is not sufficient), because monitor should not be
+ *       possible unless the device is "started" (API registered).
+ */
+static void
+execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
 {
     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
@@ -1792,39 +1790,56 @@ stonith_device_action(xmlNode * msg, char **output)
         crm_info("Malformed API action request: device %s, action %s",
                  (id? id : "not specified"),
                  (action? action : "not specified"));
-        return -EPROTO;
+        fenced_set_protocol_error(result);
+        return;
     }
 
     if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
+        // Watchdog agent actions are implemented internally
         if (stonith_watchdog_timeout_ms <= 0) {
-            return -ENODEV;
-        } else {
-            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
-                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
-                return pcmk_ok;
-            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
-                return pcmk_ok;
-            }
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+                             "Watchdog fence device not configured");
+            return;
+
+        } else if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
+            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+            pcmk__set_result_output(result,
+                                    list_to_string(stonith_watchdog_targets,
+                                                   "\n", TRUE),
+                                    NULL);
+            return;
+
+        } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
+            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+            return;
         }
     }
 
     device = g_hash_table_lookup(device_list, id);
-    if ((device == NULL)
-        || (!device->api_registered && !strcmp(action, "monitor"))) {
+    if (device == NULL) {
+        crm_info("Ignoring API '%s' action request because device %s not found",
+                 action, id);
+        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+                         NULL);
+        return;
 
+    } else if (!device->api_registered && !strcmp(action, "monitor")) {
         // Monitors may run only on "started" (API-registered) devices
-        crm_info("Ignoring API '%s' action request because device %s not found",
+        crm_info("Ignoring API '%s' action request because device %s not active",
                  action, id);
-        return -ENODEV;
+        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+                         "Fence device not active");
+        return;
     }
 
     cmd = create_async_command(msg);
     if (cmd == NULL) {
-        return -EPROTO;
+        fenced_set_protocol_error(result);
+        return;
     }
 
     schedule_stonith_command(cmd, device);
-    return -EINPROGRESS;
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
 }
 
 static void
@@ -2911,8 +2926,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
     xmlNode *data = NULL;
     bool need_reply = true;
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
-    char *output = NULL;
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
 
@@ -2935,8 +2950,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
-        rc = stonith_device_action(request, &output);
-        need_reply = (rc != -EINPROGRESS);
+        execute_agent_action(request, &result);
+        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
+        rc = pcmk_rc2legacy(stonith__result2rc(&result));
 
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
@@ -3150,19 +3166,20 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 done:
     // Reply if result is known
     if (need_reply) {
-        xmlNode *reply = stonith_construct_reply(request, output, data, rc);
+        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
 
         stonith_send_reply(reply, call_options, remote_peer, client_id);
         free_xml(reply);
     }
 
-    free(output);
     free_xml(data);
 
     crm_debug("Processed %s request from %s %s: %s (rc=%d)",
               op, ((client == NULL)? "peer" : "client"),
               ((client == NULL)? remote_peer : pcmk__client_name(client)),
               ((rc > 0)? "" : pcmk_strerror(rc)), rc);
+
+    pcmk__reset_result(&result);
 }
 
 static void
-- 
2.27.0


From 9601b2aff1ea6a4eef0bb2701c22c1e971a657eb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 17:31:20 -0600
Subject: [PATCH 03/23] Refactor: fencer: track full result for local fencing

This renames stonith_fence() to fence_locally() for readability, and has it set
a full result rather than return a legacy return code.

As of this commit, handle_request() just maps the result back to a legacy code,
but it will make better use of it with planned changes.
---
 daemons/fenced/fenced_commands.c | 38 +++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 2f59ef84b7..bfb0d71e5f 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2626,37 +2626,49 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
     }
 }
 
-static int
-stonith_fence(xmlNode * msg)
+/*!
+ * \internal
+ * \brief Execute a fence action via the local node
+ *
+ * \param[in]  msg     Fencing request
+ * \param[out] result  Where to store result of fence action
+ */
+static void
+fence_locally(xmlNode *msg, pcmk__action_result_t *result)
 {
     const char *device_id = NULL;
     stonith_device_t *device = NULL;
     async_command_t *cmd = create_async_command(msg);
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
 
+    CRM_CHECK(result != NULL, return);
+
     if (cmd == NULL) {
-        return -EPROTO;
+        fenced_set_protocol_error(result);
+        return;
     }
 
     device_id = crm_element_value(dev, F_STONITH_DEVICE);
-    if (device_id) {
+    if (device_id != NULL) {
         device = g_hash_table_lookup(device_list, device_id);
         if (device == NULL) {
             crm_err("Requested device '%s' is not available", device_id);
-            return -ENODEV;
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+                             "Requested fence device not found");
+            return;
         }
         schedule_stonith_command(cmd, device);
 
     } else {
         const char *host = crm_element_value(dev, F_STONITH_TARGET);
 
-        if (cmd->options & st_opt_cs_nodeid) {
-            int nodeid;
-            crm_node_t *node;
+        if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
+            int nodeid = 0;
+            crm_node_t *node = NULL;
 
             pcmk__scan_min_int(host, &nodeid, 0);
             node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
-            if (node) {
+            if (node != NULL) {
                 host = node->uname;
             }
         }
@@ -2666,7 +2678,7 @@ stonith_fence(xmlNode * msg)
                             TRUE, cmd, stonith_fence_get_devices_cb);
     }
 
-    return -EINPROGRESS;
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
 }
 
 xmlNode *
@@ -3016,9 +3028,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         }
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
-
-        if (remote_peer || stand_alone) {
-            rc = stonith_fence(request);
+        if ((remote_peer != NULL) || stand_alone) {
+            fence_locally(request, &result);
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
 
         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
             switch (fenced_handle_manual_confirmation(client, request)) {
-- 
2.27.0


From b7c7676cfd36fd72d3b29e86a23db97081e19b03 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 29 Nov 2021 17:06:52 -0600
Subject: [PATCH 04/23] Low: fencer: handle topology level registration errors
 better

Rename stonith_level_register() to fenced_register_level() for consistency, and
refactor it to return a full result rather than a legacy return code.

Return a protocol error for missing information in the request XML, and log
invalid level numbers at warning level. Use a new combination of
PCMK_EXEC_INVALID with CRM_EX_INVALID_PARAM for invalid levels, so it gets
mapped back to the legacy code -EINVAL (which was returned before).
---
 daemons/fenced/fenced_commands.c  | 52 +++++++++++++++++++++----------
 daemons/fenced/pacemaker-fenced.c |  9 +++---
 daemons/fenced/pacemaker-fenced.h |  3 +-
 lib/fencing/st_actions.c          |  1 +
 4 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index bfb0d71e5f..975f8633a4 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1583,20 +1583,19 @@ parse_device_list(const char *devices)
 
 /*!
  * \internal
- * \brief Register a STONITH level for a target
+ * \brief Register a fencing topology level for a target
  *
  * Given an XML request specifying the target name, level index, and device IDs
  * for the level, this will create an entry for the target in the global topology
  * table if one does not already exist, then append the specified device IDs to
  * the entry's device list for the specified level.
  *
- * \param[in]  msg   XML request for STONITH level registration
- * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
- *
- * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
+ * \param[in]  msg     XML request for STONITH level registration
+ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
+ * \param[out] result  Where to set result of registration
  */
-int
-stonith_level_register(xmlNode *msg, char **desc)
+void
+fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
 {
     int id = 0;
     xmlNode *level;
@@ -1607,6 +1606,13 @@ stonith_level_register(xmlNode *msg, char **desc)
     stonith_key_value_t *dIter = NULL;
     stonith_key_value_t *devices = NULL;
 
+    CRM_CHECK(result != NULL, return);
+
+    if (msg == NULL) {
+        fenced_set_protocol_error(result);
+        return;
+    }
+
     /* Allow the XML here to point to the level tag directly, or wrapped in
      * another tag. If directly, don't search by xpath, because it might give
      * multiple hits (e.g. if the XML is the CIB).
@@ -1614,11 +1620,15 @@ stonith_level_register(xmlNode *msg, char **desc)
     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
         level = msg;
     } else {
-        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
+        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
+    }
+    if (level == NULL) {
+        fenced_set_protocol_error(result);
+        return;
     }
-    CRM_CHECK(level != NULL, return -EINVAL);
 
     mode = stonith_level_kind(level);
+
     target = stonith_level_key(level, mode);
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
 
@@ -1626,18 +1636,26 @@ stonith_level_register(xmlNode *msg, char **desc)
         *desc = crm_strdup_printf("%s[%d]", target, id);
     }
 
-    /* Sanity-check arguments */
-    if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
-        crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
+    // Ensure level ID is in allowed range
+    if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
+        crm_warn("Ignoring topology registration for %s with invalid level %d",
+                  target, id);
         free(target);
-        crm_log_xml_err(level, "Bad topology");
-        return -EINVAL;
+        crm_log_xml_warn(level, "Bad level");
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+                         "Invalid topology level");
+        return;
     }
 
     /* Find or create topology table entry */
     tp = g_hash_table_lookup(topology, target);
     if (tp == NULL) {
         tp = calloc(1, sizeof(stonith_topology_t));
+        if (tp == NULL) {
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+                             strerror(ENOMEM));
+            return;
+        }
         tp->kind = mode;
         tp->target = target;
         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
@@ -1671,7 +1689,8 @@ stonith_level_register(xmlNode *msg, char **desc)
         crm_info("Target %s has %d active fencing level%s",
                  tp->target, nlevels, pcmk__plural_s(nlevels));
     }
-    return pcmk_ok;
+
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 }
 
 int
@@ -3142,7 +3161,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         char *device_id = NULL;
 
         if (is_privileged(client, op)) {
-            rc = stonith_level_register(request, &device_id);
+            fenced_register_level(request, &device_id, &result);
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
         } else {
             rc = -EACCES;
         }
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 0a8b3bf6f2..469304f67c 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -452,8 +452,8 @@ remove_cib_device(xmlXPathObjectPtr xpathObj)
 static void
 handle_topology_change(xmlNode *match, bool remove) 
 {
-    int rc;
     char *desc = NULL;
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     CRM_CHECK(match != NULL, return);
     crm_trace("Updating %s", ID(match));
@@ -467,9 +467,10 @@ handle_topology_change(xmlNode *match, bool remove)
         free(key);
     }
 
-    rc = stonith_level_register(match, &desc);
-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc);
-
+    fenced_register_level(match, &desc, &result);
+    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
+                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
+    pcmk__reset_result(&result);
     free(desc);
 }
 
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 5162ada75d..cf114fb979 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -218,7 +218,8 @@ void stonith_device_remove(const char *id, bool from_cib);
 
 char *stonith_level_key(xmlNode * msg, int mode);
 int stonith_level_kind(xmlNode * msg);
-int stonith_level_register(xmlNode * msg, char **desc);
+void fenced_register_level(xmlNode *msg, char **desc,
+                           pcmk__action_result_t *result);
 
 int stonith_level_remove(xmlNode * msg, char **desc);
 
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
index 7eaa8b0f2b..37fa849847 100644
--- a/lib/fencing/st_actions.c
+++ b/lib/fencing/st_actions.c
@@ -325,6 +325,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
          */
         case PCMK_EXEC_INVALID:
             switch (result->exit_status) {
+                case CRM_EX_INVALID_PARAM:      return EINVAL;
                 case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
                 case CRM_EX_PROTOCOL:           return EPROTO;
 
-- 
2.27.0


From 27cedca4070328ecac1761f81c2890059af19dcf Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 29 Nov 2021 17:29:38 -0600
Subject: [PATCH 05/23] Low: fencer: handle topology level unregistration
 errors better

Rename stonith_level_remove() to fenced_unregister_level() for consistency, and
refactor it to return a full result rather than a legacy return code.

Return a protocol error for missing information in the request XML, and log
invalid level numbers at warning level. Use PCMK_EXEC_INVALID with
CRM_EX_INVALID_PARAM for invalid levels, so it gets mapped back to the legacy
code -EINVAL (which reverses the recent change in ec60f014b, both for backward
compatibility and because it makes sense -- a missing parameter is a protocol
error, while an invalid parameter is an invalid parameter error).
---
 daemons/fenced/fenced_commands.c  | 52 ++++++++++++++++++++++++-------
 daemons/fenced/pacemaker-fenced.c |  9 +++---
 daemons/fenced/pacemaker-fenced.h |  4 +--
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 975f8633a4..ef41dc0e52 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1693,25 +1693,54 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 }
 
-int
-stonith_level_remove(xmlNode *msg, char **desc)
+/*!
+ * \internal
+ * \brief Unregister a fencing topology level for a target
+ *
+ * Given an XML request specifying the target name and level index (or 0 for all
+ * levels), this will remove any corresponding entry for the target from the
+ * global topology table.
+ *
+ * \param[in]  msg     XML request for STONITH level registration
+ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
+ * \param[out] result  Where to set result of unregistration
+ */
+void
+fenced_unregister_level(xmlNode *msg, char **desc,
+                        pcmk__action_result_t *result)
 {
     int id = -1;
     stonith_topology_t *tp;
     char *target;
+    xmlNode *level = NULL;
+
+    CRM_CHECK(result != NULL, return);
 
-    /* Unlike additions, removal requests should always have one level tag */
-    xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
+    if (msg == NULL) {
+        fenced_set_protocol_error(result);
+        return;
+    }
 
-    CRM_CHECK(level != NULL, return -EPROTO);
+    // Unlike additions, removal requests should always have one level tag
+    level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
+    if (level == NULL) {
+        fenced_set_protocol_error(result);
+        return;
+    }
 
     target = stonith_level_key(level, -1);
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
 
-    CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX),
-              crm_log_xml_warn(msg, "invalid level");
-              free(target);
-              return -EPROTO);
+    // Ensure level ID is in allowed range
+    if ((id < 0) || (id >= ST_LEVEL_MAX)) {
+        crm_warn("Ignoring topology unregistration for %s with invalid level %d",
+                  target, id);
+        free(target);
+        crm_log_xml_warn(level, "Bad level");
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+                         "Invalid topology level");
+        return;
+    }
 
     if (desc) {
         *desc = crm_strdup_printf("%s[%d]", target, id);
@@ -1745,7 +1774,7 @@ stonith_level_remove(xmlNode *msg, char **desc)
     }
 
     free(target);
-    return pcmk_ok;
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 }
 
 static char *
@@ -3173,7 +3202,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         char *device_id = NULL;
 
         if (is_privileged(client, op)) {
-            rc = stonith_level_remove(request, &device_id);
+            fenced_unregister_level(request, &device_id, &result);
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
         } else {
             rc = -EACCES;
         }
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 469304f67c..56acc93f31 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -409,17 +409,18 @@ do_stonith_notify_level(const char *op, int rc, const char *desc)
 static void
 topology_remove_helper(const char *node, int level)
 {
-    int rc;
     char *desc = NULL;
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
     xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
 
     crm_xml_add(data, F_STONITH_ORIGIN, __func__);
     crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
 
-    rc = stonith_level_remove(data, &desc);
-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc);
-
+    fenced_unregister_level(data, &desc, &result);
+    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
+                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
+    pcmk__reset_result(&result);
     free_xml(data);
     free(desc);
 }
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index cf114fb979..0006e02e7d 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -220,8 +220,8 @@ char *stonith_level_key(xmlNode * msg, int mode);
 int stonith_level_kind(xmlNode * msg);
 void fenced_register_level(xmlNode *msg, char **desc,
                            pcmk__action_result_t *result);
-
-int stonith_level_remove(xmlNode * msg, char **desc);
+void fenced_unregister_level(xmlNode *msg, char **desc,
+                             pcmk__action_result_t *result);
 
 stonith_topology_t *find_topology_for_host(const char *host);
 
-- 
2.27.0


From 3f603defca78eb2bdd46c51a80ed04a4c773442b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 12:22:33 -0600
Subject: [PATCH 06/23] Log: fencer: track and log full result when handling
 requests

handle_request() now tracks and logs a full result rather than just a
legacy return code.
---
 daemons/fenced/fenced_commands.c | 95 ++++++++++++++++++--------------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index ef41dc0e52..996c18faaa 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2981,9 +2981,7 @@ static void
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                xmlNode *request, const char *remote_peer)
 {
-    int call_options = 0;
-    int rc = -EOPNOTSUPP;
-
+    int call_options = st_opt_none;
     xmlNode *data = NULL;
     bool need_reply = true;
     pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
@@ -3006,13 +3004,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         pcmk__ipc_send_xml(client, id, reply, flags);
         client->request_id = 0;
         free_xml(reply);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
         execute_agent_action(request, &result);
         need_reply = (result.execution_status != PCMK_EXEC_PENDING);
-        rc = pcmk_rc2legacy(stonith__result2rc(&result));
 
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
@@ -3021,7 +3018,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
@@ -3033,7 +3030,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         remove_relay_op(request);
 
         stonith_query(request, remote_peer, client_id, call_options);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         need_reply = false;
 
     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
@@ -3055,7 +3052,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         }
 
         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
@@ -3069,27 +3066,27 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                    crm_element_value(dev, F_STONITH_TARGET));
 
         if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) {
-            rc = -EPROTO;
+            fenced_set_protocol_error(&result);
         } else {
-            rc = -EINPROGRESS;
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
             need_reply = false;
         }
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
         if ((remote_peer != NULL) || stand_alone) {
             fence_locally(request, &result);
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
 
         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
             switch (fenced_handle_manual_confirmation(client, request)) {
                 case pcmk_rc_ok:
-                    rc = pcmk_ok;
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
                     break;
                 case EINPROGRESS:
-                    rc = -EINPROGRESS;
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING,
+                                     NULL);
                     break;
                 default:
-                    rc = -EPROTO;
+                    fenced_set_protocol_error(&result);
                     break;
             }
 
@@ -3100,17 +3097,15 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
             const char *action = crm_element_value(dev, F_STONITH_ACTION);
             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
 
-            if (client) {
+            if (client != NULL) {
                 int tolerance = 0;
 
                 crm_notice("Client %s wants to fence (%s) %s using %s",
                            pcmk__client_name(client), action,
                            target, (device? device : "any device"));
-
                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
-
                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
-                    rc = pcmk_ok;
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
                     goto done;
                 }
 
@@ -3143,24 +3138,24 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
                                      FALSE);
-                rc = -EINPROGRESS;
+                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
 
             } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) {
-                rc = -EPROTO;
+                fenced_set_protocol_error(&result);
+
             } else {
-                rc = -EINPROGRESS;
+                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
             }
         }
-        need_reply = (rc != -EINPROGRESS);
+        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
         stonith_fence_history(request, &data, remote_peer, call_options);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         if (pcmk_is_set(call_options, st_opt_discard_reply)) {
             /* we don't expect answers to the broadcast
              * we might have sent out
              */
-            rc = pcmk_ok;
             need_reply = false;
         }
 
@@ -3168,11 +3163,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         const char *device_id = NULL;
 
         if (is_privileged(client, op)) {
-            rc = stonith_device_register(request, &device_id, FALSE);
+            int rc = stonith_device_register(request, &device_id, FALSE);
+
+            pcmk__set_result(&result,
+                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+                             stonith__legacy2status(rc),
+                             ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
         } else {
-            rc = -EACCES;
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
+                             PCMK_EXEC_INVALID,
+                             "Unprivileged users must register device via CIB");
         }
-        do_stonith_notify_device(op, rc, device_id);
+        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
@@ -3180,22 +3182,25 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
         if (is_privileged(client, op)) {
             stonith_device_remove(device_id, false);
-            rc = pcmk_ok;
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         } else {
-            rc = -EACCES;
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
+                             PCMK_EXEC_INVALID,
+                             "Unprivileged users must delete device via CIB");
         }
-        do_stonith_notify_device(op, rc, device_id);
+        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
         char *device_id = NULL;
 
         if (is_privileged(client, op)) {
             fenced_register_level(request, &device_id, &result);
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
         } else {
-            rc = -EACCES;
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
+                             PCMK_EXEC_INVALID,
+                             "Unprivileged users must add level via CIB");
         }
-        do_stonith_notify_level(op, rc, device_id);
+        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
         free(device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
@@ -3203,11 +3208,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
         if (is_privileged(client, op)) {
             fenced_unregister_level(request, &device_id, &result);
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
         } else {
-            rc = -EACCES;
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
+                             PCMK_EXEC_INVALID,
+                             "Unprivileged users must delete level via CIB");
         }
-        do_stonith_notify_level(op, rc, device_id);
+        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
 
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
         int node_id = 0;
@@ -3216,31 +3222,36 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         crm_element_value_int(request, XML_ATTR_ID, &node_id);
         name = crm_element_value(request, XML_ATTR_UNAME);
         reap_crm_member(node_id, name);
-        rc = pcmk_ok;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         need_reply = false;
 
     } else {
         crm_err("Unknown IPC request %s from %s %s", op,
                 ((client == NULL)? "peer" : "client"),
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
+        pcmk__set_result(&result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+                         "Unknown IPC request type (bug?)");
     }
 
 done:
     // Reply if result is known
     if (need_reply) {
-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
+        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
+                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
 
         stonith_send_reply(reply, call_options, remote_peer, client_id);
         free_xml(reply);
     }
 
-    free_xml(data);
-
-    crm_debug("Processed %s request from %s %s: %s (rc=%d)",
+    crm_debug("Processed %s request from %s %s: %s%s%s%s",
               op, ((client == NULL)? "peer" : "client"),
               ((client == NULL)? remote_peer : pcmk__client_name(client)),
-              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
+              pcmk_exec_status_str(result.execution_status),
+              (result.exit_reason == NULL)? "" : " (",
+              (result.exit_reason == NULL)? "" : result.exit_reason,
+              (result.exit_reason == NULL)? "" : ")");
 
+    free_xml(data);
     pcmk__reset_result(&result);
 }
 
-- 
2.27.0


From 5e13199699a4e9279520b3668c072e3db49c9782 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 15:10:36 -0600
Subject: [PATCH 07/23] Feature: fencer: pass full result in replies to
 requests

Rename stonith_construct_reply() to fenced_construct_reply() for consistency,
make it take a full result as an argument rather than separate arguments for
legacy return code and output, and add the full result to the reply (along with
the legacy return code, for backward compatibility).

This is used for peer query replies and some request replies (including replies
to local clients who requested fencing). Other replies, such as those built by
construct_async_reply(), are not affected by this commit.
---
 daemons/fenced/fenced_commands.c  | 33 ++++++++++++++++++++++---------
 daemons/fenced/fenced_remote.c    |  9 ++++++++-
 daemons/fenced/pacemaker-fenced.h |  4 ++--
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 996c18faaa..84f89e8daf 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2322,6 +2322,7 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
     const char *target = NULL;
     int timeout = 0;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
     if (dev) {
@@ -2338,7 +2339,8 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
     crm_log_xml_debug(msg, "Query");
     query = calloc(1, sizeof(struct st_query_data));
 
-    query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    query->reply = fenced_construct_reply(msg, NULL, &result);
     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
     query->client_id = client_id ? strdup(client_id) : NULL;
     query->target = target ? strdup(target) : NULL;
@@ -2729,8 +2731,23 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result)
     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
 }
 
+/*!
+ * \internal
+ * \brief Build an XML reply for a fencing operation
+ *
+ * \param[in] request  Request that reply is for
+ * \param[in] data     If not NULL, add to reply as call data
+ * \param[in] result   Full result of fencing operation
+ *
+ * \return Newly created XML reply
+ * \note The caller is responsible for freeing the result.
+ * \note This has some overlap with construct_async_reply(), but that copies
+ *       values from an async_command_t, whereas this one copies them from the
+ *       request.
+ */
 xmlNode *
-stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
+fenced_construct_reply(xmlNode *request, xmlNode *data,
+                       pcmk__action_result_t *result)
 {
     xmlNode *reply = NULL;
 
@@ -2738,8 +2755,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
 
     crm_xml_add(reply, "st_origin", __func__);
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
-    crm_xml_add(reply, F_STONITH_OUTPUT, output);
-    crm_xml_add_int(reply, F_STONITH_RC, rc);
+    stonith__xe_set_result(reply, result);
 
     if (request == NULL) {
         /* Most likely, this is the result of a stonith operation that was
@@ -2749,12 +2765,14 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
          * @TODO Maybe synchronize this information at start-up?
          */
         crm_warn("Missing request information for client notifications for "
-                 "operation with result %d (initiated before we came up?)", rc);
+                 "operation with result '%s' (initiated before we came up?)",
+                 pcmk_exec_status_str(result->execution_status));
 
     } else {
         const char *name = NULL;
         const char *value = NULL;
 
+        // Attributes to copy from request to reply
         const char *names[] = {
             F_STONITH_OPERATION,
             F_STONITH_CALLID,
@@ -2764,8 +2782,6 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
             F_STONITH_CALLOPTS
         };
 
-        crm_trace("Creating a result reply with%s reply output (rc=%d)",
-                  (data? "" : "out"), rc);
         for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
             name = names[lpc];
             value = crm_element_value(request, name);
@@ -3236,8 +3252,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 done:
     // Reply if result is known
     if (need_reply) {
-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
-                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
+        xmlNode *reply = fenced_construct_reply(request, data, &result);
 
         stonith_send_reply(reply, call_options, remote_peer, client_id);
         free_xml(reply);
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 8feb401477..baa07d9e78 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -415,7 +415,14 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 
-    reply = stonith_construct_reply(op->request, NULL, data, rc);
+    {
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+        pcmk__set_result(&result,
+                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+                         stonith__legacy2status(rc), NULL);
+        reply = fenced_construct_reply(op->request, data, &result);
+    }
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 
     /* Send fencing OP reply to local client that initiated fencing */
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 0006e02e7d..d5f4bc79fd 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -228,8 +228,8 @@ stonith_topology_t *find_topology_for_host(const char *host);
 void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
                            gboolean from_peer);
 
-xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
-                                        int rc);
+xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
+                                pcmk__action_result_t *result);
 
 void
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
-- 
2.27.0


From b32aa252b321ff40c834d153cb23f8b3be471611 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 15:43:20 -0600
Subject: [PATCH 08/23] Log: fencer: grab and log full result when processing
 peer fencing replies

fenced_process_fencing_reply() now checks for the full result, instead of only
a legacy return code, in peer replies, and uses it in log messages.
---
 daemons/fenced/fenced_remote.c | 63 ++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index baa07d9e78..c6369f0051 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -2095,21 +2095,21 @@ process_remote_stonith_query(xmlNode * msg)
 void
 fenced_process_fencing_reply(xmlNode *msg)
 {
-    int rc = 0;
     const char *id = NULL;
     const char *device = NULL;
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     CRM_CHECK(dev != NULL, return);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
     CRM_CHECK(id != NULL, return);
 
-    dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
+    dev = stonith__find_xe_with_result(msg);
     CRM_CHECK(dev != NULL, return);
 
-    crm_element_value_int(dev, F_STONITH_RC, &rc);
+    stonith__xe_get_result(dev, &result);
 
     device = crm_element_value(dev, F_STONITH_DEVICE);
 
@@ -2117,7 +2117,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         op = g_hash_table_lookup(stonith_remote_op_list, id);
     }
 
-    if (op == NULL && rc == pcmk_ok) {
+    if ((op == NULL) && pcmk__result_ok(&result)) {
         /* Record successful fencing operations */
         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
 
@@ -2139,16 +2139,19 @@ fenced_process_fencing_reply(xmlNode *msg)
     }
 
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
-        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s "
+        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
                   CRM_XS " id=%.8s",
                   op->action, op->target, op->client_name, op->originator,
-                  pcmk_strerror(rc), op->id);
-        if (rc == pcmk_ok) {
+                  pcmk_exec_status_str(result.execution_status),
+                  (result.exit_reason == NULL)? "" : " (",
+                  (result.exit_reason == NULL)? "" : result.exit_reason,
+                  (result.exit_reason == NULL)? "" : ")", op->id);
+        if (pcmk__result_ok(&result)) {
             op->state = st_done;
         } else {
             op->state = st_failed;
         }
-        remote_op_done(op, msg, rc, FALSE);
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
         return;
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         /* If this isn't a remote level broadcast, and we are not the
@@ -2162,28 +2165,35 @@ fenced_process_fencing_reply(xmlNode *msg)
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
 
-        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s "
-                   CRM_XS " rc=%d",
+        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
                    op->action, op->target, device, op->client_name,
-                   op->originator, pcmk_strerror(rc), rc);
+                   op->originator,
+                   pcmk_exec_status_str(result.execution_status),
+                  (result.exit_reason == NULL)? "" : " (",
+                  (result.exit_reason == NULL)? "" : result.exit_reason,
+                  (result.exit_reason == NULL)? "" : ")");
 
         /* We own the op, and it is complete. broadcast the result to all nodes
          * and notify our local clients. */
         if (op->state == st_done) {
-            remote_op_done(op, msg, rc, FALSE);
+            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
             return;
         }
 
-        if ((op->phase == 2) && (rc != pcmk_ok)) {
+        if ((op->phase == 2) && !pcmk__result_ok(&result)) {
             /* A remapped "on" failed, but the node was already turned off
              * successfully, so ignore the error and continue.
              */
-            crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s "
-                     "after successful 'off'", device, rc, op->target);
-            rc = pcmk_ok;
+            crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
+                     "after successful 'off'",
+                     device, pcmk_exec_status_str(result.execution_status),
+                     (result.exit_reason == NULL)? "" : ": ",
+                     (result.exit_reason == NULL)? "" : result.exit_reason,
+                     op->target);
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         }
 
-        if (rc == pcmk_ok) {
+        if (pcmk__result_ok(&result)) {
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
             advance_topology_device_in_level(op, device, msg);
@@ -2193,29 +2203,30 @@ fenced_process_fencing_reply(xmlNode *msg)
              * levels are available, mark this operation as failed and report results. */
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
                 op->state = st_failed;
-                remote_op_done(op, msg, rc, FALSE);
+                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
                 return;
             }
         }
-    } else if (rc == pcmk_ok && op->devices == NULL) {
+    } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
         crm_trace("All done for %s", op->target);
-
         op->state = st_done;
-        remote_op_done(op, msg, rc, FALSE);
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
         return;
-    } else if (rc == -ETIME && op->devices == NULL) {
+    } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
+               && (op->devices == NULL)) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
-        remote_op_done(op, msg, rc, FALSE);
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
         return;
     } else {
         /* fall-through and attempt other fencing action using another peer */
     }
 
     /* Retry on failure */
-    crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
-              op->client_name, rc);
-    call_remote_stonith(op, NULL, rc);
+    crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
+              op->target, op->originator, op->client_name,
+              pcmk_exec_status_str(result.execution_status));
+    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
 }
 
 gboolean
-- 
2.27.0


From afb5706ac606a8ea883aa1597ee63d9891cc2e13 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 15:56:30 -0600
Subject: [PATCH 09/23] Refactor: fencer: pass full result of previous failed
 action when initiating peer fencing

Rename call_remote_stonith() to request_peer_fencing() for readability, and
make it take the full result of the previous failed action, rather than just
its legacy return code, as an argument.

This does cause one change in behavior: if topology is in use, a previous
attempt failed, and no more peers have the appropriate device, then the
legacy return code returned will be -ENODEV rather than -EHOSTUNREACH.
These are treated similarly internally, and hopefully that will not cause
problems for external code.
---
 daemons/fenced/fenced_remote.c | 89 +++++++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 22 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index c6369f0051..31d5ee6e93 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -76,12 +76,13 @@ typedef struct {
 
 GHashTable *stonith_remote_op_list = NULL;
 
-void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer,
-                         int rc);
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
                                   int call_options);
 
+static void request_peer_fencing(remote_fencing_op_t *op,
+                                peer_device_info_t *peer,
+                                pcmk__action_result_t *result);
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
                                 const peer_device_info_t *chosen_peer);
@@ -609,12 +610,16 @@ static gboolean
 remote_op_timeout_one(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     op->op_timer_one = 0;
 
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
                " id=%.8s", op->action, op->target, op->client_name, op->id);
-    call_remote_stonith(op, NULL, -ETIME);
+    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
+
+    // Try another device, if appropriate
+    request_peer_fencing(op, NULL, &result);
     return FALSE;
 }
 
@@ -685,9 +690,13 @@ remote_op_query_timeout(gpointer data)
         crm_debug("Operation %.8s targeting %s already in progress",
                   op->id, op->target);
     } else if (op->query_results) {
+        // Result won't be used in this case, but we need to pass something
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+        // Query succeeded, so attempt the actual fencing
         crm_debug("Query %.8s targeting %s complete (state=%s)",
                   op->id, op->target, stonith_op_state_str(op->state));
-        call_remote_stonith(op, NULL, pcmk_ok);
+        request_peer_fencing(op, NULL, &result);
     } else {
         crm_debug("Query %.8s targeting %s timed out (state=%s)",
                   op->id, op->target, stonith_op_state_str(op->state));
@@ -1533,6 +1542,10 @@ static void
 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
                                  xmlNode *msg)
 {
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
     /* Advance to the next device at this topology level, if any */
     if (op->devices) {
         op->devices = op->devices->next;
@@ -1569,7 +1582,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
             op->delay = 0;
         }
 
-        call_remote_stonith(op, NULL, pcmk_ok);
+        request_peer_fencing(op, NULL, &result);
     } else {
         /* We're done with all devices and phases, so finalize operation */
         crm_trace("Marking complex fencing op targeting %s as complete",
@@ -1598,15 +1611,30 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
     return FALSE;
 }
 
-void
-call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
+/*!
+ * \internal
+ * \brief Ask a peer to execute a fencing operation
+ *
+ * \param[in] op      Fencing operation to be executed
+ * \param[in] peer    If NULL or topology is in use, choose best peer to execute
+ *                    the fencing, otherwise use this peer
+ * \param[in] result  Full result of previous failed attempt, if any (used as
+ *                    final result only if a previous attempt failed, topology
+ *                    is not in use, and no devices remain to be attempted)
+ */
+static void
+request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+                    pcmk__action_result_t *result)
 {
     const char *device = NULL;
-    int timeout = op->base_timeout;
+    int timeout;
+
+    CRM_CHECK(op != NULL, return);
 
     crm_trace("Action %.8s targeting %s for %s is %s",
               op->id, op->target, op->client_name,
               stonith_op_state_str(op->state));
+    timeout = op->base_timeout;
     if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
         peer = stonith_choose_peer(op);
     }
@@ -1623,9 +1651,14 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
     }
 
     if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
-        /* Ignore any peer preference, they might not have the device we need */
-        /* When using topology, stonith_choose_peer() removes the device from
-         * further consideration, so be sure to calculate timeout beforehand */
+        /* Ignore the caller's peer preference if topology is in use, because
+         * that peer might not have access to the required device. With
+         * topology, stonith_choose_peer() removes the device from further
+         * consideration, so the timeout must be calculated beforehand.
+         *
+         * @TODO Basing the total timeout on the caller's preferred peer (above)
+         *       is less than ideal.
+         */
         peer = stonith_choose_peer(op);
 
         device = op->devices->data;
@@ -1722,8 +1755,6 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
         finalize_timed_out_op(op);
 
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
-//        int rc = -EHOSTUNREACH;
-
         /* if the operation never left the query state,
          * but we have all the expected replies, then no devices
          * are available to execute the fencing operation. */
@@ -1735,17 +1766,28 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
             }
         }
 
+        // This is the only case in which result will be used
+        CRM_CHECK(result != NULL, return);
+
         if (op->state == st_query) {
             crm_info("No peers (out of %d) have devices capable of fencing "
                      "(%s) %s for client %s " CRM_XS " state=%s",
                      op->replies, op->action, op->target, op->client_name,
                      stonith_op_state_str(op->state));
 
-            rc = -ENODEV;
+            pcmk__reset_result(result);
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+                             NULL);
         } else {
             if (pcmk_is_set(op->call_options, st_opt_topology)) {
-                rc = -EHOSTUNREACH;
-            } 
+                pcmk__reset_result(result);
+                pcmk__set_result(result, CRM_EX_ERROR,
+                                 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+            }
+            /* ... else use result provided by caller -- overwriting it with
+               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
+               setting the correct delegate if needed.
+             */
 
             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
                      "for client %s " CRM_XS " state=%s",
@@ -1754,7 +1796,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
         }
 
         op->state = st_failed;
-        remote_op_done(op, NULL, rc, FALSE);
+        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
 
     } else {
         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
@@ -2004,6 +2046,7 @@ process_remote_stonith_query(xmlNode * msg)
     peer_device_info_t *peer = NULL;
     uint32_t replies_expected;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
@@ -2038,6 +2081,8 @@ process_remote_stonith_query(xmlNode * msg)
         peer = add_result(op, host, ndevices, dev);
     }
 
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
         /* If we start the fencing before all the topology results are in,
          * it is possible fencing levels will be skipped because of the missing
@@ -2045,12 +2090,12 @@ process_remote_stonith_query(xmlNode * msg)
         if (op->state == st_query && all_topology_devices_found(op)) {
             /* All the query results are in for the topology, start the fencing ops. */
             crm_trace("All topology devices found");
-            call_remote_stonith(op, peer, pcmk_ok);
+            request_peer_fencing(op, peer, &result);
 
         } else if (have_all_replies) {
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
-            call_remote_stonith(op, NULL, pcmk_ok);
+            request_peer_fencing(op, NULL, &result);
         }
 
     } else if (op->state == st_query) {
@@ -2062,12 +2107,12 @@ process_remote_stonith_query(xmlNode * msg)
             /* we have a verified device living on a peer that is not the target */
             crm_trace("Found %d verified device%s",
                       nverified, pcmk__plural_s(nverified));
-            call_remote_stonith(op, peer, pcmk_ok);
+            request_peer_fencing(op, peer, &result);
 
         } else if (have_all_replies) {
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
-            call_remote_stonith(op, NULL, pcmk_ok);
+            request_peer_fencing(op, NULL, &result);
 
         } else {
             crm_trace("Waiting for more peer results before launching fencing operation");
@@ -2226,7 +2271,7 @@ fenced_process_fencing_reply(xmlNode *msg)
     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
               op->target, op->originator, op->client_name,
               pcmk_exec_status_str(result.execution_status));
-    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
+    request_peer_fencing(op, NULL, &result);
 }
 
 gboolean
-- 
2.27.0


From 43e08ba7ee1635e47bfaf2a57636101c675b89ae Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:02:04 -0600
Subject: [PATCH 10/23] Feature: fencer: set exit reason for timeouts waiting
 for peer replies

---
 daemons/fenced/fenced_remote.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 31d5ee6e93..415a7c1b98 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -616,7 +616,9 @@ remote_op_timeout_one(gpointer userdata)
 
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
                " id=%.8s", op->action, op->target, op->client_name, op->id);
-    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
+    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+                     "Peer did not send fence result within timeout");
+
 
     // Try another device, if appropriate
     request_peer_fencing(op, NULL, &result);
-- 
2.27.0


From 34e5baebac78b7235825b31bebc44e3d65ae45cc Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:10:28 -0600
Subject: [PATCH 11/23] Refactor: fencer: pass full result when handling
 duplicate actions

Rename handle_duplicates() to finalize_op_duplicates() for readability, and
make it take a full result rather than a legacy return code as an argument.
---
 daemons/fenced/fenced_remote.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 415a7c1b98..850bfb6eb3 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -439,12 +439,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
     free_xml(notify_data);
 }
 
+/*!
+ * \internal
+ * \brief Finalize all duplicates of a given fencer operation
+ *
+ * \param[in] op         Fencer operation that completed
+ * \param[in] data       Top-level XML to add notification to
+ * \param[in] result     Full operation result
+ */
 static void
-handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
+finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
+                       pcmk__action_result_t *result)
 {
-    GList *iter = NULL;
-
-    for (iter = op->duplicates; iter != NULL; iter = iter->next) {
+    for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *other = iter->data;
 
         if (other->state == st_duplicate) {
@@ -452,8 +459,9 @@ handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
             crm_debug("Performing duplicate notification for %s@%s: %s "
                       CRM_XS " id=%.8s",
                       other->client_name, other->originator,
-                      pcmk_strerror(rc), other->id);
-            remote_op_done(other, data, rc, TRUE);
+                      pcmk_exec_status_str(result->execution_status),
+                      other->id);
+            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
 
         } else {
             // Possible if (for example) it timed out already
@@ -570,8 +578,13 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
 
     handle_local_reply_and_notify(op, data, rc);
 
-    if (dup == FALSE) {
-        handle_duplicates(op, data, rc);
+    if (!dup) {
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+        pcmk__set_result(&result,
+                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+                         stonith__legacy2status(rc), NULL);
+        finalize_op_duplicates(op, data, &result);
     }
 
     /* Free non-essential parts of the record
-- 
2.27.0


From 939bd6f5f0f79b19d0cc4d869f3c8980fda2e461 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:23:20 -0600
Subject: [PATCH 12/23] Feature: fencer: set exit reasons for fencing timeouts

finalize_timed_out_op() now takes an exit reason as an argument.
It is called for fencing timeouts, peer query reply timeouts,
and all capable nodes failing to fence.

At this point, the exit reason is not used, but that is planned.
---
 daemons/fenced/fenced_remote.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 850bfb6eb3..c10a32442e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -643,10 +643,12 @@ remote_op_timeout_one(gpointer userdata)
  * \brief Finalize a remote fencer operation that timed out
  *
  * \param[in] op      Fencer operation that timed out
+ * \param[in] reason  Readable description of what step timed out
  */
 static void
-finalize_timed_out_op(remote_fencing_op_t *op)
+finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
 {
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     op->op_timer_total = 0;
 
@@ -660,13 +662,13 @@ finalize_timed_out_op(remote_fencing_op_t *op)
          * devices, and return success.
          */
         op->state = st_done;
-        remote_op_done(op, NULL, pcmk_ok, FALSE);
-        return;
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    } else {
+        op->state = st_failed;
+        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
     }
-
-    op->state = st_failed;
-
-    remote_op_done(op, NULL, -ETIME, FALSE);
+    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+    pcmk__reset_result(&result);
 }
 
 /*!
@@ -687,7 +689,8 @@ remote_op_timeout(gpointer userdata)
                   CRM_XS " id=%.8s",
                   op->action, op->target, op->client_name, op->id);
     } else {
-        finalize_timed_out_op(userdata);
+        finalize_timed_out_op(userdata, "Fencing could not be completed "
+                                        "within overall timeout");
     }
     return G_SOURCE_REMOVE;
 }
@@ -719,7 +722,8 @@ remote_op_query_timeout(gpointer data)
             g_source_remove(op->op_timer_total);
             op->op_timer_total = 0;
         }
-        finalize_timed_out_op(op);
+        finalize_timed_out_op(op, "No capable peers replied to device query "
+                                  "within timeout");
     }
 
     return FALSE;
@@ -1767,7 +1771,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
                  CRM_XS " state=%s", op->action, op->target, op->client_name,
                  stonith_op_state_str(op->state));
         CRM_CHECK(op->state < st_done, return);
-        finalize_timed_out_op(op);
+        finalize_timed_out_op(op, "All nodes failed, or are unable, to "
+                                  "fence target");
 
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
         /* if the operation never left the query state,
-- 
2.27.0


From b80b02799260feb98723a460f2f8e8ad5cdc467f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:32:04 -0600
Subject: [PATCH 13/23] Refactor: fencer: pass full result when finalizing peer
 fencing actions

Rename remote_op_done() to finalize_op() for readability, and make it take a
full result as an argument, rather than a legacy return code.

This does cause one change in behavior: when all topology levels fail,
the legacy return code returned will be -pcmk_err_generic instead of EINVAL.
---
 daemons/fenced/fenced_history.c |   2 +-
 daemons/fenced/fenced_remote.c  | 177 ++++++++++++++++++--------------
 2 files changed, 103 insertions(+), 76 deletions(-)

diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index bc159383c2..9e38ff0a20 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -374,7 +374,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
                 set_fencing_completed(op);
                 /* use -EHOSTUNREACH to not introduce a new return-code that might
                    trigger unexpected results at other places and to prevent
-                   remote_op_done from setting the delegate if not present
+                   finalize_op from setting the delegate if not present
                 */
                 stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
             }
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index c10a32442e..aefc5f311c 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -76,13 +76,14 @@ typedef struct {
 
 GHashTable *stonith_remote_op_list = NULL;
 
-static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
                                   int call_options);
 
 static void request_peer_fencing(remote_fencing_op_t *op,
                                 peer_device_info_t *peer,
                                 pcmk__action_result_t *result);
+static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
+                        pcmk__action_result_t *result, bool dup);
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
                                 const peer_device_info_t *chosen_peer);
@@ -461,7 +462,7 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
                       other->client_name, other->originator,
                       pcmk_exec_status_str(result->execution_status),
                       other->id);
-            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
+            finalize_op(other, data, result, true);
 
         } else {
             // Possible if (for example) it timed out already
@@ -487,104 +488,100 @@ delegate_from_xml(xmlNode *xml)
 
 /*!
  * \internal
- * \brief Finalize a remote operation.
+ * \brief Finalize a peer fencing operation
  *
- * \description This function has two code paths.
+ * Clean up after a fencing operation completes. This function has two code
+ * paths: the executioner uses it to broadcast the result to CPG peers, and then
+ * each peer (including the executioner) uses it to process that broadcast and
+ * notify its IPC clients of the result.
  *
- * Path 1. This node is the owner of the operation and needs
- *         to notify the cpg group via a broadcast as to the operation's
- *         results.
- *
- * Path 2. The cpg broadcast is received. All nodes notify their local
- *         stonith clients the operation results.
- *
- * So, The owner of the operation first notifies the cluster of the result,
- * and once that cpg notify is received back it notifies all the local clients.
- *
- * Nodes that are passive watchers of the operation will receive the
- * broadcast and only need to notify their local clients the operation finished.
- *
- * \param op, The fencing operation to finalize
- * \param data, The xml msg reply (if present) of the last delegated fencing
- *              operation.
- * \param dup, Is this operation a duplicate, if so treat it a little differently
- *             making sure the broadcast is not sent out.
+ * \param[in] op      Fencer operation that completed
+ * \param[in] data    If not NULL, XML reply of last delegated fencing operation
+ * \param[in] result  Full operation result
+ * \param[in] dup     Whether this operation is a duplicate of another
+ *                    (in which case, do not broadcast the result)
  */
 static void
-remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
+finalize_op(remote_fencing_op_t *op, xmlNode *data,
+            pcmk__action_result_t *result, bool dup)
 {
     int level = LOG_ERR;
     const char *subt = NULL;
     xmlNode *local_data = NULL;
     gboolean op_merged = FALSE;
 
+    CRM_CHECK((op != NULL) && (result != NULL), return);
+
+    if (op->notify_sent) {
+        // Most likely, this is a timed-out action that eventually completed
+        crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
+                   "Result arrived too late " CRM_XS " id=%.8s",
+                   op->action, (op->target? " targeting " : ""),
+                   (op->target? op->target : ""),
+                   (op->delegate? op->delegate : "unknown node"),
+                   op->client_name, op->originator,
+                   (op_merged? " (merged)" : ""),
+                   op->id);
+        return;
+    }
+
     set_fencing_completed(op);
     clear_remote_op_timers(op);
     undo_op_remap(op);
 
-    if (op->notify_sent == TRUE) {
-        crm_err("Already sent notifications for '%s' targeting %s by %s for "
-                "client %s@%s: %s " CRM_XS " rc=%d state=%s id=%.8s",
-                op->action, op->target,
-                (op->delegate? op->delegate : "unknown node"),
-                op->client_name, op->originator, pcmk_strerror(rc),
-                rc, stonith_op_state_str(op->state), op->id);
-        goto remote_op_done_cleanup;
-    }
-
     if (data == NULL) {
         data = create_xml_node(NULL, "remote-op");
         local_data = data;
 
     } else if (op->delegate == NULL) {
-        switch (rc) {
-            case -ENODEV:
-            case -EHOSTUNREACH:
+        switch (result->execution_status) {
+            case PCMK_EXEC_NO_FENCE_DEVICE:
                 break;
+            case PCMK_EXEC_INVALID:
+                if (result->exit_status == CRM_EX_EXPIRED) {
+                    break;
+                }
+                // else fall through
             default:
                 op->delegate = delegate_from_xml(data);
                 break;
         }
     }
 
-    if(dup) {
-        op_merged = TRUE;
-    } else if (crm_element_value(data, F_STONITH_MERGED)) {
-        op_merged = TRUE;
-    } 
+    if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
+        op_merged = true;
+    }
 
     /* Tell everyone the operation is done, we will continue
      * with doing the local notifications once we receive
      * the broadcast back. */
     subt = crm_element_value(data, F_SUBTYPE);
-    if (dup == FALSE && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+    if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
         /* Defer notification until the bcast message arrives */
-        stonith_bcast_result_to_peers(op, rc, (op_merged? TRUE: FALSE));
-        goto remote_op_done_cleanup;
+        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
+        free_xml(local_data);
+        return;
     }
 
-    if (rc == pcmk_ok || dup) {
-        level = LOG_NOTICE;
-    } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+    if (pcmk__result_ok(result) || dup
+        || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         level = LOG_NOTICE;
     }
-
-    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s "
+    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
                CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
                (op->target? op->target : ""),
                (op->delegate? op->delegate : "unknown node"),
                op->client_name, op->originator,
-               (op_merged? " (merged)" : ""), pcmk_strerror(rc), op->id);
+               (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
+               pcmk_exec_status_str(result->execution_status),
+               ((result->exit_reason == NULL)? "" : ": "),
+               ((result->exit_reason == NULL)? "" : result->exit_reason),
+               op->id);
 
-    handle_local_reply_and_notify(op, data, rc);
+    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
 
     if (!dup) {
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-        pcmk__set_result(&result,
-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
-                         stonith__legacy2status(rc), NULL);
-        finalize_op_duplicates(op, data, &result);
+        finalize_op_duplicates(op, data, result);
     }
 
     /* Free non-essential parts of the record
@@ -594,20 +591,27 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
         g_list_free_full(op->query_results, free_remote_query);
         op->query_results = NULL;
     }
-
     if (op->request) {
         free_xml(op->request);
         op->request = NULL;
     }
 
-  remote_op_done_cleanup:
     free_xml(local_data);
 }
 
+/*!
+ * \internal
+ * \brief Finalize a watchdog fencer op after the waiting time expires
+ *
+ * \param[in] userdata  Fencer operation that completed
+ *
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
+ */
 static gboolean
 remote_op_watchdog_done(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     op->op_timer_one = 0;
 
@@ -615,8 +619,9 @@ remote_op_watchdog_done(gpointer userdata)
                CRM_XS " id=%.8s",
                op->action, op->target, op->client_name, op->id);
     op->state = st_done;
-    remote_op_done(op, NULL, pcmk_ok, FALSE);
-    return FALSE;
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    finalize_op(op, NULL, &result, false);
+    return G_SOURCE_REMOVE;
 }
 
 static gboolean
@@ -667,7 +672,7 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
         op->state = st_failed;
         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
     }
-    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+    finalize_op(op, NULL, &result, false);
     pcmk__reset_result(&result);
 }
 
@@ -1064,9 +1069,13 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
     set_fencing_completed(op);
     op->delegate = strdup("a human");
 
-    // For the fencer's purposes, the fencing operation is done
+    {
+        // For the fencer's purposes, the fencing operation is done
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
-    remote_op_done(op, msg, pcmk_ok, FALSE);
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+        finalize_op(op, msg, &result, false);
+    }
 
     /* For the requester's purposes, the operation is still pending. The
      * actual result will be sent asynchronously via the operation's done_cb().
@@ -1200,6 +1209,16 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
     return op;
 }
 
+/*!
+ * \internal
+ * \brief Create a peer fencing operation from a request, and initiate it
+ *
+ * \param[in] client     IPC client that made request (NULL to get from request)
+ * \param[in] request    Request XML
+ * \param[in] manual_ack Whether this is a manual action confirmation
+ *
+ * \return Newly created operation on success, otherwise NULL
+ */
 remote_fencing_op_t *
 initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
                            gboolean manual_ack)
@@ -1234,9 +1253,17 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
 
     switch (op->state) {
         case st_failed:
-            crm_warn("Could not request peer fencing (%s) targeting %s "
-                     CRM_XS " id=%.8s", op->action, op->target, op->id);
-            remote_op_done(op, NULL, -EINVAL, FALSE);
+            // advance_topology_level() exhausted levels
+            {
+                pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+                pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+                                 "All topology levels failed");
+                crm_warn("Could not request peer fencing (%s) targeting %s "
+                         CRM_XS " id=%.8s", op->action, op->target, op->id);
+                finalize_op(op, NULL, &result, false);
+                pcmk__reset_result(&result);
+            }
             return op;
 
         case st_duplicate:
@@ -1607,7 +1634,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
         crm_trace("Marking complex fencing op targeting %s as complete",
                   op->target);
         op->state = st_done;
-        remote_op_done(op, msg, pcmk_ok, FALSE);
+        finalize_op(op, msg, &result, false);
     }
 }
 
@@ -1805,7 +1832,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
             }
             /* ... else use result provided by caller -- overwriting it with
-               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
+               PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
                setting the correct delegate if needed.
              */
 
@@ -1816,7 +1843,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
         }
 
         op->state = st_failed;
-        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
+        finalize_op(op, NULL, result, false);
 
     } else {
         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
@@ -2216,7 +2243,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         } else {
             op->state = st_failed;
         }
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+        finalize_op(op, msg, &result, false);
         return;
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         /* If this isn't a remote level broadcast, and we are not the
@@ -2241,7 +2268,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         /* We own the op, and it is complete. broadcast the result to all nodes
          * and notify our local clients. */
         if (op->state == st_done) {
-            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+            finalize_op(op, msg, &result, false);
             return;
         }
 
@@ -2268,20 +2295,20 @@ fenced_process_fencing_reply(xmlNode *msg)
              * levels are available, mark this operation as failed and report results. */
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
                 op->state = st_failed;
-                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+                finalize_op(op, msg, &result, false);
                 return;
             }
         }
     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
         crm_trace("All done for %s", op->target);
         op->state = st_done;
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+        finalize_op(op, msg, &result, false);
         return;
     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
                && (op->devices == NULL)) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+        finalize_op(op, msg, &result, false);
         return;
     } else {
         /* fall-through and attempt other fencing action using another peer */
-- 
2.27.0


From 8f19c09f1b961ba9aa510b7dcd1875bbabcddcdc Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:39:23 -0600
Subject: [PATCH 14/23] Refactor: fencer: pass full result when broadcasting
 replies

Rename stonith_bcast_result_to_peers() to fenced_broadcast_op_result() for
consistency, and make it take the full result as an argument instead of a
legacy return code. The full result is not yet used, but that is planned.
---
 daemons/fenced/fenced_history.c   | 18 ++++++++++++------
 daemons/fenced/fenced_remote.c    | 15 ++++++++++++---
 daemons/fenced/pacemaker-fenced.h |  9 ++-------
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 9e38ff0a20..1e07a9815a 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -359,24 +359,29 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
     }
 
     if (remote_history) {
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
         init_stonith_remote_op_hash_table(&stonith_remote_op_list);
 
         updated |= g_hash_table_size(remote_history);
 
         g_hash_table_iter_init(&iter, remote_history);
         while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
-
             if (stonith__op_state_pending(op->state) &&
                 pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+
                 crm_warn("Failing pending operation %.8s originated by us but "
                          "known only from peer history", op->id);
                 op->state = st_failed;
                 set_fencing_completed(op);
-                /* use -EHOSTUNREACH to not introduce a new return-code that might
-                   trigger unexpected results at other places and to prevent
-                   finalize_op from setting the delegate if not present
-                */
-                stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
+
+                /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
+                 * from setting a delegate
+                 */
+                pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
+                                 "Initiated by earlier fencer "
+                                 "process and presumed failed");
+                fenced_broadcast_op_result(op, &result, false);
             }
 
             g_hash_table_iter_steal(&iter);
@@ -391,6 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
              */
         }
 
+        pcmk__reset_result(&result);
         g_hash_table_destroy(remote_history); /* remove what is left */
     }
 
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index aefc5f311c..a0f026c790 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -374,12 +374,21 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
     return notify_data;
 }
 
+/*!
+ * \internal
+ * \brief Broadcast a fence result notification to all CPG peers
+ *
+ * \param[in] op         Fencer operation that completed
+ * \param[in] result     Full operation result
+ * \param[in] op_merged  Whether this operation is a duplicate of another
+ */
 void
-stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged)
+fenced_broadcast_op_result(remote_fencing_op_t *op,
+                           pcmk__action_result_t *result, bool op_merged)
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
-    xmlNode *notify_data = create_op_done_notify(op, rc);
+    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
 
     count++;
     crm_trace("Broadcasting result to peers");
@@ -558,7 +567,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
     subt = crm_element_value(data, F_SUBTYPE);
     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
         /* Defer notification until the bcast message arrives */
-        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
+        fenced_broadcast_op_result(op, result, op_merged);
         free_xml(local_data);
         return;
     }
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index d5f4bc79fd..ed47ab046c 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -153,13 +153,8 @@ typedef struct remote_fencing_op_s {
 
 } remote_fencing_op_t;
 
-/*!
- * \internal
- * \brief Broadcast the result of an operation to the peers.
- * \param op, Operation whose result should be broadcast
- * \param rc, Result of the operation
- */
-void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged);
+void fenced_broadcast_op_result(remote_fencing_op_t *op,
+                                pcmk__action_result_t *result, bool op_merged);
 
 // Fencer-specific client flags
 enum st_client_flags {
-- 
2.27.0


From 3396e66b4c9cca895c7412b66159fd2342de1911 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:42:46 -0600
Subject: [PATCH 15/23] Feature: fencer: add full result to local replies

handle_local_reply_and_notify() now takes the full result as an argument
instead of a legacy return code, and adds it to the reply to the local
requester. It does not add it to notifications yet, but that is planned.
---
 daemons/fenced/fenced_remote.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index a0f026c790..329e06c444 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -409,8 +409,17 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
     return;
 }
 
+/*!
+ * \internal
+ * \brief Reply to a local request originator and notify all subscribed clients
+ *
+ * \param[in] op         Fencer operation that completed
+ * \param[in] data       Top-level XML to add notification to
+ * \param[in] result     Full operation result
+ */
 static void
-handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
+handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+                              pcmk__action_result_t *result)
 {
     xmlNode *notify_data = NULL;
     xmlNode *reply = NULL;
@@ -421,26 +430,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
     }
 
     /* Do notification with a clean data object */
-    notify_data = create_op_done_notify(op, rc);
+    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
     crm_xml_add_int(data, "state", op->state);
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 
-    {
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-        pcmk__set_result(&result,
-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
-                         stonith__legacy2status(rc), NULL);
-        reply = fenced_construct_reply(op->request, data, &result);
-    }
+    reply = fenced_construct_reply(op->request, data, result);
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 
     /* Send fencing OP reply to local client that initiated fencing */
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
+    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
     do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
 
     /* mark this op as having notify's already sent */
@@ -587,7 +589,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
                ((result->exit_reason == NULL)? "" : result->exit_reason),
                op->id);
 
-    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
+    handle_local_reply_and_notify(op, data, result);
 
     if (!dup) {
         finalize_op_duplicates(op, data, result);
-- 
2.27.0


From 004583f3ef908cbd9dc6305597cb55d5ad22882c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:47:13 -0600
Subject: [PATCH 16/23] Refactor: fencer: pass full result when sending device
 notifications

Rename do_stonith_notify_device() to fenced_send_device_notification() for
consistency, and make it take the full result as an argument rather than a
legacy return code. The full result is not used yet, but that is planned.
---
 daemons/fenced/fenced_commands.c  |  4 ++--
 daemons/fenced/pacemaker-fenced.c | 15 +++++++++++++--
 daemons/fenced/pacemaker-fenced.h |  4 +++-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 84f89e8daf..86a761dfab 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -3190,7 +3190,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                              PCMK_EXEC_INVALID,
                              "Unprivileged users must register device via CIB");
         }
-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+        fenced_send_device_notification(op, &result, device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
@@ -3204,7 +3204,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                              PCMK_EXEC_INVALID,
                              "Unprivileged users must delete device via CIB");
         }
-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+        fenced_send_device_notification(op, &result, device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
         char *device_id = NULL;
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 56acc93f31..42e167ce78 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -394,10 +394,21 @@ do_stonith_notify_config(const char *op, int rc,
     free_xml(notify_data);
 }
 
+/*!
+ * \internal
+ * \brief Send notifications for a device change to subscribed clients
+ *
+ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD or
+ *                    STONITH_OP_DEVICE_DEL)
+ * \param[in] result  Operation result
+ * \param[in] desc    ID of device that changed
+ */
 void
-do_stonith_notify_device(const char *op, int rc, const char *desc)
+fenced_send_device_notification(const char *op,
+                                const pcmk__action_result_t *result,
+                                const char *desc)
 {
-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list));
+    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
 }
 
 void
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index ed47ab046c..0b63680171 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -230,7 +230,9 @@ void
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
 
 void do_stonith_notify(const char *type, int result, xmlNode *data);
-void do_stonith_notify_device(const char *op, int rc, const char *desc);
+void fenced_send_device_notification(const char *op,
+                                     const pcmk__action_result_t *result,
+                                     const char *desc);
 void do_stonith_notify_level(const char *op, int rc, const char *desc);
 
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
-- 
2.27.0


From ee0777d5ca99d8d2d7805d4a73241ab696c68751 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:51:55 -0600
Subject: [PATCH 17/23] Refactor: fencer: pass full result when sending
 topology notifications

Rename do_stonith_notify_level() to fenced_send_level_notification() for
consistency, and make it take the full result as an argument rather than a
legacy return code. The full result is not used yet, but that is planned.
---
 daemons/fenced/fenced_commands.c  |  4 ++--
 daemons/fenced/pacemaker-fenced.c | 21 +++++++++++++++------
 daemons/fenced/pacemaker-fenced.h |  4 +++-
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 86a761dfab..2f3dbb035a 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -3216,7 +3216,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                              PCMK_EXEC_INVALID,
                              "Unprivileged users must add level via CIB");
         }
-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+        fenced_send_level_notification(op, &result, device_id);
         free(device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
@@ -3229,7 +3229,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                              PCMK_EXEC_INVALID,
                              "Unprivileged users must delete level via CIB");
         }
-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+        fenced_send_level_notification(op, &result, device_id);
 
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
         int node_id = 0;
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 42e167ce78..773cf57f6b 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -411,10 +411,21 @@ fenced_send_device_notification(const char *op,
     do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
 }
 
+/*!
+ * \internal
+ * \brief Send notifications for a topology level change to subscribed clients
+ *
+ * \param[in] op      Notification type (STONITH_OP_LEVEL_ADD or
+ *                    STONITH_OP_LEVEL_DEL)
+ * \param[in] result  Operation result
+ * \param[in] desc    String representation of level (<target>[<level_index>])
+ */
 void
-do_stonith_notify_level(const char *op, int rc, const char *desc)
+fenced_send_level_notification(const char *op,
+                               const pcmk__action_result_t *result,
+                               const char *desc)
 {
-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology));
+    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
 }
 
 static void
@@ -429,8 +440,7 @@ topology_remove_helper(const char *node, int level)
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
 
     fenced_unregister_level(data, &desc, &result);
-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
+    fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc);
     pcmk__reset_result(&result);
     free_xml(data);
     free(desc);
@@ -480,8 +490,7 @@ handle_topology_change(xmlNode *match, bool remove)
     }
 
     fenced_register_level(match, &desc, &result);
-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
+    fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc);
     pcmk__reset_result(&result);
     free(desc);
 }
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 0b63680171..8503e813bf 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -233,7 +233,9 @@ void do_stonith_notify(const char *type, int result, xmlNode *data);
 void fenced_send_device_notification(const char *op,
                                      const pcmk__action_result_t *result,
                                      const char *desc);
-void do_stonith_notify_level(const char *op, int rc, const char *desc);
+void fenced_send_level_notification(const char *op,
+                                    const pcmk__action_result_t *result,
+                                    const char *desc);
 
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
                                                 xmlNode *request,
-- 
2.27.0


From deec1ea9bcd7e0062755aa8b74358bfd12e4b9f0 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:53:26 -0600
Subject: [PATCH 18/23] Refactor: fencer: pass full result when sending
 configuration notifications

Rename do_stonith_notify_config() to send_config_notification() for
consistency, and make it take the full result as an argument rather than a
legacy return code. The full result is not used yet, but that is planned.
---
 daemons/fenced/pacemaker-fenced.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 773cf57f6b..d64358e07f 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -379,8 +379,19 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
     crm_trace("Notify complete");
 }
 
+/*!
+ * \internal
+ * \brief Send notifications for a configuration change to subscribed clients
+ *
+ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD,
+ *                    STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
+ *                    STONITH_OP_LEVEL_DEL)
+ * \param[in] result  Operation result
+ * \param[in] desc    Description of what changed
+ * \param[in] active  Current number of devices or topologies in use
+ */
 static void
-do_stonith_notify_config(const char *op, int rc,
+send_config_notification(const char *op, const pcmk__action_result_t *result,
                          const char *desc, int active)
 {
     xmlNode *notify_data = create_xml_node(NULL, op);
@@ -390,7 +401,7 @@ do_stonith_notify_config(const char *op, int rc,
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
 
-    do_stonith_notify(op, rc, notify_data);
+    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
     free_xml(notify_data);
 }
 
@@ -408,7 +419,7 @@ fenced_send_device_notification(const char *op,
                                 const pcmk__action_result_t *result,
                                 const char *desc)
 {
-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
+    send_config_notification(op, result, desc, g_hash_table_size(device_list));
 }
 
 /*!
@@ -425,7 +436,7 @@ fenced_send_level_notification(const char *op,
                                const pcmk__action_result_t *result,
                                const char *desc)
 {
-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
+    send_config_notification(op, result, desc, g_hash_table_size(topology));
 }
 
 static void
-- 
2.27.0


From 432e4445b630fb158482a5f6de1e0e41697a381f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:56:12 -0600
Subject: [PATCH 19/23] Feature: fencer: pass full result when sending
 notifications

Rename do_stonith_notify() to fenced_send_notification() for consistency, and
make it take the full result as an argument rather than a legacy return code,
and add the full result to the notifications.
---
 daemons/fenced/fenced_commands.c  |  4 ++--
 daemons/fenced/fenced_history.c   |  6 +++---
 daemons/fenced/fenced_remote.c    |  6 +++---
 daemons/fenced/pacemaker-fenced.c | 15 ++++++++++++---
 daemons/fenced/pacemaker-fenced.h |  4 +++-
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 2f3dbb035a..54ebc12947 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2489,8 +2489,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
 
-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+        fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
     }
 }
 
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 1e07a9815a..44310ed77b 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target,
         g_hash_table_foreach_remove(stonith_remote_op_list,
                              stonith_remove_history_entry,
                              (gpointer) target);
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
     }
 }
 
@@ -402,7 +402,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
 
     if (updated) {
         stonith_fence_history_trim();
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
     }
 
     if (cnt == 0) {
@@ -473,7 +473,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
            is done so send a notification for anything
            that smells like history-sync
          */
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL);
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY_SYNCED, NULL, NULL);
         if (crm_element_value(msg, F_STONITH_CALLID)) {
             /* this is coming from the stonith-API
             *
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 329e06c444..16c181b4b0 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -442,8 +442,8 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
-    do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+    fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+    fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
 
     /* mark this op as having notify's already sent */
     op->notify_sent = TRUE;
@@ -1211,7 +1211,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 
     if (op->state != st_duplicate) {
         /* kick history readers */
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
     }
 
     /* safe to trim as long as that doesn't touch pending ops */
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index d64358e07f..6b31b814a3 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -356,8 +356,17 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int
     free_xml(notify_data);
 }
 
+/*!
+ * \internal
+ * \brief Notify relevant IPC clients of a fencing operation result
+ *
+ * \param[in] type     Notification type
+ * \param[in] result   Result of fencing operation (assume success if NULL)
+ * \param[in] data     If not NULL, add to notification as call data
+ */
 void
-do_stonith_notify(const char *type, int result, xmlNode *data)
+fenced_send_notification(const char *type, const pcmk__action_result_t *result,
+                         xmlNode *data)
 {
     /* TODO: Standardize the contents of data */
     xmlNode *update_msg = create_xml_node(NULL, "notify");
@@ -367,7 +376,7 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
     crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
     crm_xml_add(update_msg, F_SUBTYPE, type);
     crm_xml_add(update_msg, F_STONITH_OPERATION, type);
-    crm_xml_add_int(update_msg, F_STONITH_RC, result);
+    stonith__xe_set_result(update_msg, result);
 
     if (data != NULL) {
         add_message_xml(update_msg, F_STONITH_CALLDATA, data);
@@ -401,7 +410,7 @@ send_config_notification(const char *op, const pcmk__action_result_t *result,
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
 
-    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+    fenced_send_notification(op, result, notify_data);
     free_xml(notify_data);
 }
 
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 8503e813bf..502fcc9a29 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -229,7 +229,9 @@ xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
 void
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
 
-void do_stonith_notify(const char *type, int result, xmlNode *data);
+void fenced_send_notification(const char *type,
+                              const pcmk__action_result_t *result,
+                              xmlNode *data);
 void fenced_send_device_notification(const char *op,
                                      const pcmk__action_result_t *result,
                                      const char *desc);
-- 
2.27.0


From 86deababe506c2bb8259538e5380b6a78dc4b770 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 16:58:03 -0600
Subject: [PATCH 20/23] Feature: fencer: pass full result when sending
 notifications

Rename create_op_done_notify() to fencing_result2xml() for readability,
make it take the full result as an argument rather than a legacy return code,
and add the full result to broadcasts and notifications.
---
 daemons/fenced/fenced_remote.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 16c181b4b0..4cf723e6df 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -356,13 +356,22 @@ undo_op_remap(remote_fencing_op_t *op)
     }
 }
 
+/*!
+ * \internal
+ * \brief Create notification data XML for a fencing operation result
+ *
+ * \param[in] op      Fencer operation that completed
+ * \param[in] result  Full operation result
+ *
+ * \return Newly created XML to add as notification data
+ * \note The caller is responsible for freeing the result.
+ */
 static xmlNode *
-create_op_done_notify(remote_fencing_op_t * op, int rc)
+fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
 {
     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
 
     crm_xml_add_int(notify_data, "state", op->state);
-    crm_xml_add_int(notify_data, F_STONITH_RC, rc);
     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
@@ -371,6 +380,7 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
 
+    stonith__xe_set_result(notify_data, result);
     return notify_data;
 }
 
@@ -388,7 +398,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
-    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
+    xmlNode *notify_data = fencing_result2xml(op, result);
 
     count++;
     crm_trace("Broadcasting result to peers");
@@ -430,7 +440,6 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
     }
 
     /* Do notification with a clean data object */
-    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
     crm_xml_add_int(data, "state", op->state);
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
@@ -442,13 +451,14 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
+    notify_data = fencing_result2xml(op, result);
     fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+    free_xml(notify_data);
     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
 
     /* mark this op as having notify's already sent */
     op->notify_sent = TRUE;
     free_xml(reply);
-    free_xml(notify_data);
 }
 
 /*!
-- 
2.27.0


From 2814cde97520b63ca5f9baf3df37d73507e89d34 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 15 Dec 2021 17:40:52 -0600
Subject: [PATCH 21/23] Low: fencer: restore check for invalid topology level
 target

... per review. b7c7676c mistakenly dropped it
---
 daemons/fenced/fenced_commands.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 54ebc12947..1a4a791385 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1636,6 +1636,16 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
         *desc = crm_strdup_printf("%s[%d]", target, id);
     }
 
+    // Ensure a valid target was specified
+    if ((mode < 0) || (mode > 2)) {
+        crm_warn("Ignoring topology level registration without valid target");
+        free(target);
+        crm_log_xml_warn(level, "Bad level");
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+                         "Invalid topology level target");
+        return;
+    }
+
     // Ensure level ID is in allowed range
     if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
         crm_warn("Ignoring topology registration for %s with invalid level %d",
@@ -1643,7 +1653,7 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
         free(target);
         crm_log_xml_warn(level, "Bad level");
         pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
-                         "Invalid topology level");
+                         "Invalid topology level number");
         return;
     }
 
-- 
2.27.0


From c82806f9e16abcea00025fd3a290477aef2d8d83 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Dec 2021 16:23:29 -0600
Subject: [PATCH 22/23] Low: fencer: free result memory when processing fencing
 replies

found in review
---
 daemons/fenced/fenced_remote.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 4cf723e6df..9fda9ef060 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -2241,14 +2241,14 @@ fenced_process_fencing_reply(xmlNode *msg)
         /* Could be for an event that began before we started */
         /* TODO: Record the op for later querying */
         crm_info("Received peer result of unknown or expired operation %s", id);
-        return;
+        goto done;
     }
 
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
         crm_err("Received outdated reply for device %s (instead of %s) to "
                 "fence (%s) %s. Operation already timed out at peer level.",
                 device, (const char *) op->devices->data, op->action, op->target);
-        return;
+        goto done;
     }
 
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
@@ -2265,14 +2265,15 @@ fenced_process_fencing_reply(xmlNode *msg)
             op->state = st_failed;
         }
         finalize_op(op, msg, &result, false);
-        return;
+        goto done;
+
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         /* If this isn't a remote level broadcast, and we are not the
          * originator of the operation, we should not be receiving this msg. */
         crm_err("Received non-broadcast fencing result for operation %.8s "
                 "we do not own (device %s targeting %s)",
                 op->id, device, op->target);
-        return;
+        goto done;
     }
 
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
@@ -2290,7 +2291,7 @@ fenced_process_fencing_reply(xmlNode *msg)
          * and notify our local clients. */
         if (op->state == st_done) {
             finalize_op(op, msg, &result, false);
-            return;
+            goto done;
         }
 
         if ((op->phase == 2) && !pcmk__result_ok(&result)) {
@@ -2310,27 +2311,30 @@ fenced_process_fencing_reply(xmlNode *msg)
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
             advance_topology_device_in_level(op, device, msg);
-            return;
+            goto done;
         } else {
             /* This device failed, time to try another topology level. If no other
              * levels are available, mark this operation as failed and report results. */
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
                 op->state = st_failed;
                 finalize_op(op, msg, &result, false);
-                return;
+                goto done;
             }
         }
+
     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
         crm_trace("All done for %s", op->target);
         op->state = st_done;
         finalize_op(op, msg, &result, false);
-        return;
+        goto done;
+
     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
                && (op->devices == NULL)) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
         finalize_op(op, msg, &result, false);
-        return;
+        goto done;
+
     } else {
         /* fall-through and attempt other fencing action using another peer */
     }
@@ -2340,6 +2344,8 @@ fenced_process_fencing_reply(xmlNode *msg)
               op->target, op->originator, op->client_name,
               pcmk_exec_status_str(result.execution_status));
     request_peer_fencing(op, NULL, &result);
+done:
+    pcmk__reset_result(&result);
 }
 
 gboolean
-- 
2.27.0


From 137bf97fdb39043eebb02a0d3ebbe47ee8c7044c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Dec 2021 16:26:22 -0600
Subject: [PATCH 23/23] Log: fencer: clarify timeout message

... as suggested by review
---
 daemons/fenced/fenced_remote.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 9fda9ef060..1e237150c5 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -656,7 +656,7 @@ remote_op_timeout_one(gpointer userdata)
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
                " id=%.8s", op->action, op->target, op->client_name, op->id);
     pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
-                     "Peer did not send fence result within timeout");
+                     "Peer did not return fence result within timeout");
 
 
     // Try another device, if appropriate
-- 
2.27.0