Blame SOURCES/009-fencing-reasons.patch

33afe3
From fcd42a5926e9a63d425586552ecc7b543838d352 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Thu, 11 Nov 2021 16:57:03 -0600
33afe3
Subject: [PATCH 01/23] Feature: fencer: pass full result in async command
33afe3
 replies
33afe3
33afe3
The services library callbacks for async commands, which call
33afe3
send_async_reply() -> construct_async_reply() to create the reply, now add
33afe3
fields for exit status, operation status, and exit reason, in addition to the
33afe3
existing action standard output and legacy return code.
33afe3
33afe3
Nothing uses the new fields yet.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c | 10 ++++------
33afe3
 1 file changed, 4 insertions(+), 6 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index f34cb4f136..3497428c18 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -2415,9 +2415,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
33afe3
     if (stand_alone) {
33afe3
         /* Do notification with a clean data object */
33afe3
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
33afe3
-        int rc = pcmk_rc2legacy(stonith__result2rc(result));
33afe3
 
33afe3
-        crm_xml_add_int(notify_data, F_STONITH_RC, rc);
33afe3
+        stonith__xe_set_result(notify_data, result);
33afe3
         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
33afe3
         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
33afe3
         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
33afe3
@@ -2425,7 +2424,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
33afe3
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
33afe3
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
33afe3
 
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
33afe3
+        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
         do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
     }
33afe3
 }
33afe3
@@ -2728,9 +2727,8 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
33afe3
     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
33afe3
     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
33afe3
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
33afe3
-    crm_xml_add_int(reply, F_STONITH_RC,
33afe3
-                    pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
-    crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout);
33afe3
+
33afe3
+    stonith__xe_set_result(reply, result);
33afe3
     return reply;
33afe3
 }
33afe3
 
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 4bac2e9811872f92571e4f5a47d8c5032cfc3016 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Thu, 18 Nov 2021 12:41:29 -0600
33afe3
Subject: [PATCH 02/23] Refactor: fencer: track full result for direct agent
33afe3
 actions
33afe3
33afe3
This renames stonith_device_action() to execute_agent_action() for readability,
33afe3
and has it set a full result rather than return a legacy return code.
33afe3
33afe3
As of this commit, handle_request() just maps the result back to a legacy code,
33afe3
but it will make better use of it with planned changes.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c | 95 +++++++++++++++++++-------------
33afe3
 1 file changed, 56 insertions(+), 39 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 3497428c18..2f59ef84b7 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -1729,23 +1729,6 @@ stonith_level_remove(xmlNode *msg, char **desc)
33afe3
     return pcmk_ok;
33afe3
 }
33afe3
 
33afe3
-/*!
33afe3
- * \internal
33afe3
- * \brief Schedule an (asynchronous) action directly on a stonith device
33afe3
- *
33afe3
- * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
33afe3
- * directly on a specified device. Only list, monitor, and status actions are
33afe3
- * expected to use this call, though it should work with any agent command.
33afe3
- *
33afe3
- * \param[in]  msg     API message XML with desired action
33afe3
- * \param[out] output  Unused
33afe3
- *
33afe3
- * \return -EINPROGRESS on success, -errno otherwise
33afe3
- * \note If the action is monitor, the device must be registered via the API
33afe3
- *       (CIB registration is not sufficient), because monitor should not be
33afe3
- *       possible unless the device is "started" (API registered).
33afe3
- */
33afe3
-
33afe3
 static char *
33afe3
 list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
33afe3
 {
33afe3
@@ -1778,8 +1761,23 @@ list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
33afe3
     return rv;
33afe3
 }
33afe3
 
33afe3
-static int
33afe3
-stonith_device_action(xmlNode * msg, char **output)
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Execute a fence agent action directly (and asynchronously)
33afe3
+ *
33afe3
+ * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
33afe3
+ * directly on a specified device. Only list, monitor, and status actions are
33afe3
+ * expected to use this call, though it should work with any agent command.
33afe3
+ *
33afe3
+ * \param[in]  msg     Request XML specifying action
33afe3
+ * \param[out] result  Where to store result of action
33afe3
+ *
33afe3
+ * \note If the action is monitor, the device must be registered via the API
33afe3
+ *       (CIB registration is not sufficient), because monitor should not be
33afe3
+ *       possible unless the device is "started" (API registered).
33afe3
+ */
33afe3
+static void
33afe3
+execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
33afe3
 {
33afe3
     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
33afe3
     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
33afe3
@@ -1792,39 +1790,56 @@ stonith_device_action(xmlNode * msg, char **output)
33afe3
         crm_info("Malformed API action request: device %s, action %s",
33afe3
                  (id? id : "not specified"),
33afe3
                  (action? action : "not specified"));
33afe3
-        return -EPROTO;
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
     if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
33afe3
+        // Watchdog agent actions are implemented internally
33afe3
         if (stonith_watchdog_timeout_ms <= 0) {
33afe3
-            return -ENODEV;
33afe3
-        } else {
33afe3
-            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
33afe3
-                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
33afe3
-                return pcmk_ok;
33afe3
-            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
33afe3
-                return pcmk_ok;
33afe3
-            }
33afe3
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
33afe3
+                             "Watchdog fence device not configured");
33afe3
+            return;
33afe3
+
33afe3
+        } else if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
33afe3
+            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+            pcmk__set_result_output(result,
33afe3
+                                    list_to_string(stonith_watchdog_targets,
33afe3
+                                                   "\n", TRUE),
33afe3
+                                    NULL);
33afe3
+            return;
33afe3
+
33afe3
+        } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
33afe3
+            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+            return;
33afe3
         }
33afe3
     }
33afe3
 
33afe3
     device = g_hash_table_lookup(device_list, id);
33afe3
-    if ((device == NULL)
33afe3
-        || (!device->api_registered && !strcmp(action, "monitor"))) {
33afe3
+    if (device == NULL) {
33afe3
+        crm_info("Ignoring API '%s' action request because device %s not found",
33afe3
+                 action, id);
33afe3
+        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
33afe3
+                         NULL);
33afe3
+        return;
33afe3
 
33afe3
+    } else if (!device->api_registered && !strcmp(action, "monitor")) {
33afe3
         // Monitors may run only on "started" (API-registered) devices
33afe3
-        crm_info("Ignoring API '%s' action request because device %s not found",
33afe3
+        crm_info("Ignoring API '%s' action request because device %s not active",
33afe3
                  action, id);
33afe3
-        return -ENODEV;
33afe3
+        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
33afe3
+                         "Fence device not active");
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
     cmd = create_async_command(msg);
33afe3
     if (cmd == NULL) {
33afe3
-        return -EPROTO;
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
     schedule_stonith_command(cmd, device);
33afe3
-    return -EINPROGRESS;
33afe3
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
 }
33afe3
 
33afe3
 static void
33afe3
@@ -2911,8 +2926,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 
33afe3
     xmlNode *data = NULL;
33afe3
     bool need_reply = true;
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
-    char *output = NULL;
33afe3
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
33afe3
     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
33afe3
 
33afe3
@@ -2935,8 +2950,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
33afe3
-        rc = stonith_device_action(request, &output);
33afe3
-        need_reply = (rc != -EINPROGRESS);
33afe3
+        execute_agent_action(request, &result);
33afe3
+        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
33afe3
+        rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
33afe3
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
33afe3
@@ -3150,19 +3166,20 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 done:
33afe3
     // Reply if result is known
33afe3
     if (need_reply) {
33afe3
-        xmlNode *reply = stonith_construct_reply(request, output, data, rc);
33afe3
+        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
33afe3
 
33afe3
         stonith_send_reply(reply, call_options, remote_peer, client_id);
33afe3
         free_xml(reply);
33afe3
     }
33afe3
 
33afe3
-    free(output);
33afe3
     free_xml(data);
33afe3
 
33afe3
     crm_debug("Processed %s request from %s %s: %s (rc=%d)",
33afe3
               op, ((client == NULL)? "peer" : "client"),
33afe3
               ((client == NULL)? remote_peer : pcmk__client_name(client)),
33afe3
               ((rc > 0)? "" : pcmk_strerror(rc)), rc);
33afe3
+
33afe3
+    pcmk__reset_result(&result);
33afe3
 }
33afe3
 
33afe3
 static void
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 9601b2aff1ea6a4eef0bb2701c22c1e971a657eb Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Thu, 18 Nov 2021 17:31:20 -0600
33afe3
Subject: [PATCH 03/23] Refactor: fencer: track full result for local fencing
33afe3
33afe3
This renames stonith_fence() to fence_locally() for readability, and has it set
33afe3
a full result rather than return a legacy return code.
33afe3
33afe3
As of this commit, handle_request() just maps the result back to a legacy code,
33afe3
but it will make better use of it with planned changes.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c | 38 +++++++++++++++++++++-----------
33afe3
 1 file changed, 25 insertions(+), 13 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 2f59ef84b7..bfb0d71e5f 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -2626,37 +2626,49 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
33afe3
     }
33afe3
 }
33afe3
 
33afe3
-static int
33afe3
-stonith_fence(xmlNode * msg)
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Execute a fence action via the local node
33afe3
+ *
33afe3
+ * \param[in]  msg     Fencing request
33afe3
+ * \param[out] result  Where to store result of fence action
33afe3
+ */
33afe3
+static void
33afe3
+fence_locally(xmlNode *msg, pcmk__action_result_t *result)
33afe3
 {
33afe3
     const char *device_id = NULL;
33afe3
     stonith_device_t *device = NULL;
33afe3
     async_command_t *cmd = create_async_command(msg);
33afe3
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
33afe3
 
33afe3
+    CRM_CHECK(result != NULL, return);
33afe3
+
33afe3
     if (cmd == NULL) {
33afe3
-        return -EPROTO;
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
     device_id = crm_element_value(dev, F_STONITH_DEVICE);
33afe3
-    if (device_id) {
33afe3
+    if (device_id != NULL) {
33afe3
         device = g_hash_table_lookup(device_list, device_id);
33afe3
         if (device == NULL) {
33afe3
             crm_err("Requested device '%s' is not available", device_id);
33afe3
-            return -ENODEV;
33afe3
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
33afe3
+                             "Requested fence device not found");
33afe3
+            return;
33afe3
         }
33afe3
         schedule_stonith_command(cmd, device);
33afe3
 
33afe3
     } else {
33afe3
         const char *host = crm_element_value(dev, F_STONITH_TARGET);
33afe3
 
33afe3
-        if (cmd->options & st_opt_cs_nodeid) {
33afe3
-            int nodeid;
33afe3
-            crm_node_t *node;
33afe3
+        if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
33afe3
+            int nodeid = 0;
33afe3
+            crm_node_t *node = NULL;
33afe3
 
33afe3
             pcmk__scan_min_int(host, &nodeid, 0);
33afe3
             node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
33afe3
-            if (node) {
33afe3
+            if (node != NULL) {
33afe3
                 host = node->uname;
33afe3
             }
33afe3
         }
33afe3
@@ -2666,7 +2678,7 @@ stonith_fence(xmlNode * msg)
33afe3
                             TRUE, cmd, stonith_fence_get_devices_cb);
33afe3
     }
33afe3
 
33afe3
-    return -EINPROGRESS;
33afe3
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
 }
33afe3
 
33afe3
 xmlNode *
33afe3
@@ -3016,9 +3028,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         }
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
33afe3
-
33afe3
-        if (remote_peer || stand_alone) {
33afe3
-            rc = stonith_fence(request);
33afe3
+        if ((remote_peer != NULL) || stand_alone) {
33afe3
+            fence_locally(request, &result);
33afe3
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
 
33afe3
         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
33afe3
             switch (fenced_handle_manual_confirmation(client, request)) {
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From b7c7676cfd36fd72d3b29e86a23db97081e19b03 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Mon, 29 Nov 2021 17:06:52 -0600
33afe3
Subject: [PATCH 04/23] Low: fencer: handle topology level registration errors
33afe3
 better
33afe3
33afe3
Rename stonith_level_register() to fenced_register_level() for consistency, and
33afe3
refactor it to return a full result rather than a legacy return code.
33afe3
33afe3
Return a protocol error for missing information in the request XML, and log
33afe3
invalid level numbers at warning level. Use a new combination of
33afe3
PCMK_EXEC_INVALID with CRM_EX_INVALID_PARAM for invalid levels, so it gets
33afe3
mapped back to the legacy code -EINVAL (which was returned before).
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  | 52 +++++++++++++++++++++----------
33afe3
 daemons/fenced/pacemaker-fenced.c |  9 +++---
33afe3
 daemons/fenced/pacemaker-fenced.h |  3 +-
33afe3
 lib/fencing/st_actions.c          |  1 +
33afe3
 4 files changed, 44 insertions(+), 21 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index bfb0d71e5f..975f8633a4 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -1583,20 +1583,19 @@ parse_device_list(const char *devices)
33afe3
 
33afe3
 /*!
33afe3
  * \internal
33afe3
- * \brief Register a STONITH level for a target
33afe3
+ * \brief Register a fencing topology level for a target
33afe3
  *
33afe3
  * Given an XML request specifying the target name, level index, and device IDs
33afe3
  * for the level, this will create an entry for the target in the global topology
33afe3
  * table if one does not already exist, then append the specified device IDs to
33afe3
  * the entry's device list for the specified level.
33afe3
  *
33afe3
- * \param[in]  msg   XML request for STONITH level registration
33afe3
- * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
33afe3
- *
33afe3
- * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
33afe3
+ * \param[in]  msg     XML request for STONITH level registration
33afe3
+ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
33afe3
+ * \param[out] result  Where to set result of registration
33afe3
  */
33afe3
-int
33afe3
-stonith_level_register(xmlNode *msg, char **desc)
33afe3
+void
33afe3
+fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
33afe3
 {
33afe3
     int id = 0;
33afe3
     xmlNode *level;
33afe3
@@ -1607,6 +1606,13 @@ stonith_level_register(xmlNode *msg, char **desc)
33afe3
     stonith_key_value_t *dIter = NULL;
33afe3
     stonith_key_value_t *devices = NULL;
33afe3
 
33afe3
+    CRM_CHECK(result != NULL, return);
33afe3
+
33afe3
+    if (msg == NULL) {
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
+    }
33afe3
+
33afe3
     /* Allow the XML here to point to the level tag directly, or wrapped in
33afe3
      * another tag. If directly, don't search by xpath, because it might give
33afe3
      * multiple hits (e.g. if the XML is the CIB).
33afe3
@@ -1614,11 +1620,15 @@ stonith_level_register(xmlNode *msg, char **desc)
33afe3
     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
33afe3
         level = msg;
33afe3
     } else {
33afe3
-        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
33afe3
+        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
33afe3
+    }
33afe3
+    if (level == NULL) {
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
     }
33afe3
-    CRM_CHECK(level != NULL, return -EINVAL);
33afe3
 
33afe3
     mode = stonith_level_kind(level);
33afe3
+
33afe3
     target = stonith_level_key(level, mode);
33afe3
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id;;
33afe3
 
33afe3
@@ -1626,18 +1636,26 @@ stonith_level_register(xmlNode *msg, char **desc)
33afe3
         *desc = crm_strdup_printf("%s[%d]", target, id);
33afe3
     }
33afe3
 
33afe3
-    /* Sanity-check arguments */
33afe3
-    if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
33afe3
-        crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
33afe3
+    // Ensure level ID is in allowed range
33afe3
+    if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
33afe3
+        crm_warn("Ignoring topology registration for %s with invalid level %d",
33afe3
+                  target, id);
33afe3
         free(target);
33afe3
-        crm_log_xml_err(level, "Bad topology");
33afe3
-        return -EINVAL;
33afe3
+        crm_log_xml_warn(level, "Bad level");
33afe3
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
33afe3
+                         "Invalid topology level");
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
     /* Find or create topology table entry */
33afe3
     tp = g_hash_table_lookup(topology, target);
33afe3
     if (tp == NULL) {
33afe3
         tp = calloc(1, sizeof(stonith_topology_t));
33afe3
+        if (tp == NULL) {
33afe3
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
33afe3
+                             strerror(ENOMEM));
33afe3
+            return;
33afe3
+        }
33afe3
         tp->kind = mode;
33afe3
         tp->target = target;
33afe3
         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
33afe3
@@ -1671,7 +1689,8 @@ stonith_level_register(xmlNode *msg, char **desc)
33afe3
         crm_info("Target %s has %d active fencing level%s",
33afe3
                  tp->target, nlevels, pcmk__plural_s(nlevels));
33afe3
     }
33afe3
-    return pcmk_ok;
33afe3
+
33afe3
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
 }
33afe3
 
33afe3
 int
33afe3
@@ -3142,7 +3161,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         char *device_id = NULL;
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
-            rc = stonith_level_register(request, &device_id);
33afe3
+            fenced_register_level(request, &device_id, &result);
33afe3
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
         } else {
33afe3
             rc = -EACCES;
33afe3
         }
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index 0a8b3bf6f2..469304f67c 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -452,8 +452,8 @@ remove_cib_device(xmlXPathObjectPtr xpathObj)
33afe3
 static void
33afe3
 handle_topology_change(xmlNode *match, bool remove) 
33afe3
 {
33afe3
-    int rc;
33afe3
     char *desc = NULL;
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     CRM_CHECK(match != NULL, return);
33afe3
     crm_trace("Updating %s", ID(match));
33afe3
@@ -467,9 +467,10 @@ handle_topology_change(xmlNode *match, bool remove)
33afe3
         free(key);
33afe3
     }
33afe3
 
33afe3
-    rc = stonith_level_register(match, &desc);
33afe3
-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc);
33afe3
-
33afe3
+    fenced_register_level(match, &desc, &result);
33afe3
+    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
33afe3
+                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
33afe3
+    pcmk__reset_result(&result);
33afe3
     free(desc);
33afe3
 }
33afe3
 
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index 5162ada75d..cf114fb979 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -218,7 +218,8 @@ void stonith_device_remove(const char *id, bool from_cib);
33afe3
 
33afe3
 char *stonith_level_key(xmlNode * msg, int mode);
33afe3
 int stonith_level_kind(xmlNode * msg);
33afe3
-int stonith_level_register(xmlNode * msg, char **desc);
33afe3
+void fenced_register_level(xmlNode *msg, char **desc,
33afe3
+                           pcmk__action_result_t *result);
33afe3
 
33afe3
 int stonith_level_remove(xmlNode * msg, char **desc);
33afe3
 
33afe3
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
33afe3
index 7eaa8b0f2b..37fa849847 100644
33afe3
--- a/lib/fencing/st_actions.c
33afe3
+++ b/lib/fencing/st_actions.c
33afe3
@@ -325,6 +325,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
33afe3
          */
33afe3
         case PCMK_EXEC_INVALID:
33afe3
             switch (result->exit_status) {
33afe3
+                case CRM_EX_INVALID_PARAM:      return EINVAL;
33afe3
                 case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
33afe3
                 case CRM_EX_PROTOCOL:           return EPROTO;
33afe3
 
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 27cedca4070328ecac1761f81c2890059af19dcf Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Mon, 29 Nov 2021 17:29:38 -0600
33afe3
Subject: [PATCH 05/23] Low: fencer: handle topology level unregistration
33afe3
 errors better
33afe3
33afe3
Rename stonith_level_remove() to fenced_unregister_level() for consistency, and
33afe3
refactor it to return a full result rather than a legacy return code.
33afe3
33afe3
Return a protocol error for missing information in the request XML, and log
33afe3
invalid level numbers at warning level. Use PCMK_EXEC_INVALID with
33afe3
CRM_EX_INVALID_PARAM for invalid levels, so it gets mapped back to the legacy
33afe3
code -EINVAL (which reverses the recent change in ec60f014b, both for backward
33afe3
compatibility and because it makes sense -- a missing parameter is a protocol
33afe3
error, while an invalid parameter is an invalid parameter error).
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  | 52 ++++++++++++++++++++++++-------
33afe3
 daemons/fenced/pacemaker-fenced.c |  9 +++---
33afe3
 daemons/fenced/pacemaker-fenced.h |  4 +--
33afe3
 3 files changed, 48 insertions(+), 17 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 975f8633a4..ef41dc0e52 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -1693,25 +1693,54 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
33afe3
     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
 }
33afe3
 
33afe3
-int
33afe3
-stonith_level_remove(xmlNode *msg, char **desc)
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Unregister a fencing topology level for a target
33afe3
+ *
33afe3
+ * Given an XML request specifying the target name and level index (or 0 for all
33afe3
+ * levels), this will remove any corresponding entry for the target from the
33afe3
+ * global topology table.
33afe3
+ *
33afe3
+ * \param[in]  msg     XML request for STONITH level registration
33afe3
+ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
33afe3
+ * \param[out] result  Where to set result of unregistration
33afe3
+ */
33afe3
+void
33afe3
+fenced_unregister_level(xmlNode *msg, char **desc,
33afe3
+                        pcmk__action_result_t *result)
33afe3
 {
33afe3
     int id = -1;
33afe3
     stonith_topology_t *tp;
33afe3
     char *target;
33afe3
+    xmlNode *level = NULL;
33afe3
+
33afe3
+    CRM_CHECK(result != NULL, return);
33afe3
 
33afe3
-    /* Unlike additions, removal requests should always have one level tag */
33afe3
-    xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
33afe3
+    if (msg == NULL) {
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
+    }
33afe3
 
33afe3
-    CRM_CHECK(level != NULL, return -EPROTO);
33afe3
+    // Unlike additions, removal requests should always have one level tag
33afe3
+    level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
33afe3
+    if (level == NULL) {
33afe3
+        fenced_set_protocol_error(result);
33afe3
+        return;
33afe3
+    }
33afe3
 
33afe3
     target = stonith_level_key(level, -1);
33afe3
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id;;
33afe3
 
33afe3
-    CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX),
33afe3
-              crm_log_xml_warn(msg, "invalid level");
33afe3
-              free(target);
33afe3
-              return -EPROTO);
33afe3
+    // Ensure level ID is in allowed range
33afe3
+    if ((id < 0) || (id >= ST_LEVEL_MAX)) {
33afe3
+        crm_warn("Ignoring topology unregistration for %s with invalid level %d",
33afe3
+                  target, id);
33afe3
+        free(target);
33afe3
+        crm_log_xml_warn(level, "Bad level");
33afe3
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
33afe3
+                         "Invalid topology level");
33afe3
+        return;
33afe3
+    }
33afe3
 
33afe3
     if (desc) {
33afe3
         *desc = crm_strdup_printf("%s[%d]", target, id);
33afe3
@@ -1745,7 +1774,7 @@ stonith_level_remove(xmlNode *msg, char **desc)
33afe3
     }
33afe3
 
33afe3
     free(target);
33afe3
-    return pcmk_ok;
33afe3
+    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
 }
33afe3
 
33afe3
 static char *
33afe3
@@ -3173,7 +3202,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         char *device_id = NULL;
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
-            rc = stonith_level_remove(request, &device_id);
33afe3
+            fenced_unregister_level(request, &device_id, &result);
33afe3
+            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
         } else {
33afe3
             rc = -EACCES;
33afe3
         }
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index 469304f67c..56acc93f31 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -409,17 +409,18 @@ do_stonith_notify_level(const char *op, int rc, const char *desc)
33afe3
 static void
33afe3
 topology_remove_helper(const char *node, int level)
33afe3
 {
33afe3
-    int rc;
33afe3
     char *desc = NULL;
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
     xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
33afe3
 
33afe3
     crm_xml_add(data, F_STONITH_ORIGIN, __func__);
33afe3
     crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
33afe3
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
33afe3
 
33afe3
-    rc = stonith_level_remove(data, &desc);
33afe3
-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc);
33afe3
-
33afe3
+    fenced_unregister_level(data, &desc, &result);
33afe3
+    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
33afe3
+                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
33afe3
+    pcmk__reset_result(&result);
33afe3
     free_xml(data);
33afe3
     free(desc);
33afe3
 }
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index cf114fb979..0006e02e7d 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -220,8 +220,8 @@ char *stonith_level_key(xmlNode * msg, int mode);
33afe3
 int stonith_level_kind(xmlNode * msg);
33afe3
 void fenced_register_level(xmlNode *msg, char **desc,
33afe3
                            pcmk__action_result_t *result);
33afe3
-
33afe3
-int stonith_level_remove(xmlNode * msg, char **desc);
33afe3
+void fenced_unregister_level(xmlNode *msg, char **desc,
33afe3
+                             pcmk__action_result_t *result);
33afe3
 
33afe3
 stonith_topology_t *find_topology_for_host(const char *host);
33afe3
 
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 3f603defca78eb2bdd46c51a80ed04a4c773442b Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 12:22:33 -0600
33afe3
Subject: [PATCH 06/23] Log: fencer: track and log full result when handling
33afe3
 requests
33afe3
33afe3
handle_request() now tracks and logs a full result rather than just a
33afe3
legacy return code.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c | 95 ++++++++++++++++++--------------
33afe3
 1 file changed, 53 insertions(+), 42 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index ef41dc0e52..996c18faaa 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -2981,9 +2981,7 @@ static void
33afe3
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                xmlNode *request, const char *remote_peer)
33afe3
 {
33afe3
-    int call_options = 0;
33afe3
-    int rc = -EOPNOTSUPP;
33afe3
-
33afe3
+    int call_options = st_opt_none;
33afe3
     xmlNode *data = NULL;
33afe3
     bool need_reply = true;
33afe3
     pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
@@ -3006,13 +3004,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         pcmk__ipc_send_xml(client, id, reply, flags);
33afe3
         client->request_id = 0;
33afe3
         free_xml(reply);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
33afe3
         execute_agent_action(request, &result);
33afe3
         need_reply = (result.execution_status != PCMK_EXEC_PENDING);
33afe3
-        rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
33afe3
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
33afe3
@@ -3021,7 +3018,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 
33afe3
         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
33afe3
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
33afe3
@@ -3033,7 +3030,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         remove_relay_op(request);
33afe3
 
33afe3
         stonith_query(request, remote_peer, client_id, call_options);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
33afe3
@@ -3055,7 +3052,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         }
33afe3
 
33afe3
         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
33afe3
@@ -3069,27 +3066,27 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                    crm_element_value(dev, F_STONITH_TARGET));
33afe3
 
33afe3
         if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) {
33afe3
-            rc = -EPROTO;
33afe3
+            fenced_set_protocol_error(&result);
33afe3
         } else {
33afe3
-            rc = -EINPROGRESS;
33afe3
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
             need_reply = false;
33afe3
         }
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
33afe3
         if ((remote_peer != NULL) || stand_alone) {
33afe3
             fence_locally(request, &result);
33afe3
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
 
33afe3
         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
33afe3
             switch (fenced_handle_manual_confirmation(client, request)) {
33afe3
                 case pcmk_rc_ok:
33afe3
-                    rc = pcmk_ok;
33afe3
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
                     break;
33afe3
                 case EINPROGRESS:
33afe3
-                    rc = -EINPROGRESS;
33afe3
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING,
33afe3
+                                     NULL);
33afe3
                     break;
33afe3
                 default:
33afe3
-                    rc = -EPROTO;
33afe3
+                    fenced_set_protocol_error(&result);
33afe3
                     break;
33afe3
             }
33afe3
 
33afe3
@@ -3100,17 +3097,15 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
             const char *action = crm_element_value(dev, F_STONITH_ACTION);
33afe3
             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
33afe3
 
33afe3
-            if (client) {
33afe3
+            if (client != NULL) {
33afe3
                 int tolerance = 0;
33afe3
 
33afe3
                 crm_notice("Client %s wants to fence (%s) %s using %s",
33afe3
                            pcmk__client_name(client), action,
33afe3
                            target, (device? device : "any device"));
33afe3
-
33afe3
                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
33afe3
-
33afe3
                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
33afe3
-                    rc = pcmk_ok;
33afe3
+                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
                     goto done;
33afe3
                 }
33afe3
 
33afe3
@@ -3143,24 +3138,24 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
33afe3
                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
33afe3
                                      FALSE);
33afe3
-                rc = -EINPROGRESS;
33afe3
+                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
 
33afe3
             } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) {
33afe3
-                rc = -EPROTO;
33afe3
+                fenced_set_protocol_error(&result);
33afe3
+
33afe3
             } else {
33afe3
-                rc = -EINPROGRESS;
33afe3
+                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
             }
33afe3
         }
33afe3
-        need_reply = (rc != -EINPROGRESS);
33afe3
+        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
33afe3
         stonith_fence_history(request, &data, remote_peer, call_options);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         if (pcmk_is_set(call_options, st_opt_discard_reply)) {
33afe3
             /* we don't expect answers to the broadcast
33afe3
              * we might have sent out
33afe3
              */
33afe3
-            rc = pcmk_ok;
33afe3
             need_reply = false;
33afe3
         }
33afe3
 
33afe3
@@ -3168,11 +3163,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         const char *device_id = NULL;
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
-            rc = stonith_device_register(request, &device_id, FALSE);
33afe3
+            int rc = stonith_device_register(request, &device_id, FALSE);
33afe3
+
33afe3
+            pcmk__set_result(&result,
33afe3
+                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
33afe3
+                             stonith__legacy2status(rc),
33afe3
+                             ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
33afe3
         } else {
33afe3
-            rc = -EACCES;
33afe3
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
33afe3
+                             PCMK_EXEC_INVALID,
33afe3
+                             "Unprivileged users must register device via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_device(op, rc, device_id);
33afe3
+        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
33afe3
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
33afe3
@@ -3180,22 +3182,25 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
             stonith_device_remove(device_id, false);
33afe3
-            rc = pcmk_ok;
33afe3
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         } else {
33afe3
-            rc = -EACCES;
33afe3
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
33afe3
+                             PCMK_EXEC_INVALID,
33afe3
+                             "Unprivileged users must delete device via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_device(op, rc, device_id);
33afe3
+        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
33afe3
         char *device_id = NULL;
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
             fenced_register_level(request, &device_id, &result);
33afe3
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
         } else {
33afe3
-            rc = -EACCES;
33afe3
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
33afe3
+                             PCMK_EXEC_INVALID,
33afe3
+                             "Unprivileged users must add level via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_level(op, rc, device_id);
33afe3
+        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
         free(device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
33afe3
@@ -3203,11 +3208,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 
33afe3
         if (is_privileged(client, op)) {
33afe3
             fenced_unregister_level(request, &device_id, &result);
33afe3
-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
33afe3
         } else {
33afe3
-            rc = -EACCES;
33afe3
+            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
33afe3
+                             PCMK_EXEC_INVALID,
33afe3
+                             "Unprivileged users must delete level via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_level(op, rc, device_id);
33afe3
+        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
 
33afe3
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
33afe3
         int node_id = 0;
33afe3
@@ -3216,31 +3222,36 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
         crm_element_value_int(request, XML_ATTR_ID, &node_id);
33afe3
         name = crm_element_value(request, XML_ATTR_UNAME);
33afe3
         reap_crm_member(node_id, name);
33afe3
-        rc = pcmk_ok;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         need_reply = false;
33afe3
 
33afe3
     } else {
33afe3
         crm_err("Unknown IPC request %s from %s %s", op,
33afe3
                 ((client == NULL)? "peer" : "client"),
33afe3
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
33afe3
+        pcmk__set_result(&result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
33afe3
+                         "Unknown IPC request type (bug?)");
33afe3
     }
33afe3
 
33afe3
 done:
33afe3
     // Reply if result is known
33afe3
     if (need_reply) {
33afe3
-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
33afe3
+        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
33afe3
+                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
33afe3
 
33afe3
         stonith_send_reply(reply, call_options, remote_peer, client_id);
33afe3
         free_xml(reply);
33afe3
     }
33afe3
 
33afe3
-    free_xml(data);
33afe3
-
33afe3
-    crm_debug("Processed %s request from %s %s: %s (rc=%d)",
33afe3
+    crm_debug("Processed %s request from %s %s: %s%s%s%s",
33afe3
               op, ((client == NULL)? "peer" : "client"),
33afe3
               ((client == NULL)? remote_peer : pcmk__client_name(client)),
33afe3
-              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
33afe3
+              pcmk_exec_status_str(result.execution_status),
33afe3
+              (result.exit_reason == NULL)? "" : " (",
33afe3
+              (result.exit_reason == NULL)? "" : result.exit_reason,
33afe3
+              (result.exit_reason == NULL)? "" : ")");
33afe3
 
33afe3
+    free_xml(data);
33afe3
     pcmk__reset_result(&result);
33afe3
 }
33afe3
 
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 5e13199699a4e9279520b3668c072e3db49c9782 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 15:10:36 -0600
33afe3
Subject: [PATCH 07/23] Feature: fencer: pass full result in replies to
33afe3
 requests
33afe3
33afe3
Rename stonith_construct_reply() to fenced_construct_reply() for consistency,
33afe3
make it take a full result as an argument rather than separate arguments for
33afe3
legacy return code and output, and add the full result to the reply (along with
33afe3
the legacy return code, for backward compatibility).
33afe3
33afe3
This is used for peer query replies and some request replies (including replies
33afe3
to local clients who requested fencing). Other replies, such as those built by
33afe3
construct_async_reply(), are not affected by this commit.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  | 33 ++++++++++++++++++++++---------
33afe3
 daemons/fenced/fenced_remote.c    |  9 ++++++++-
33afe3
 daemons/fenced/pacemaker-fenced.h |  4 ++--
33afe3
 3 files changed, 34 insertions(+), 12 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 996c18faaa..84f89e8daf 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -2322,6 +2322,7 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
33afe3
     const char *target = NULL;
33afe3
     int timeout = 0;
33afe3
     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
33afe3
     if (dev) {
33afe3
@@ -2338,7 +2339,8 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
33afe3
     crm_log_xml_debug(msg, "Query");
33afe3
     query = calloc(1, sizeof(struct st_query_data));
33afe3
 
33afe3
-    query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
33afe3
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+    query->reply = fenced_construct_reply(msg, NULL, &result);
33afe3
     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
33afe3
     query->client_id = client_id ? strdup(client_id) : NULL;
33afe3
     query->target = target ? strdup(target) : NULL;
33afe3
@@ -2729,8 +2731,23 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result)
33afe3
     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Build an XML reply for a fencing operation
33afe3
+ *
33afe3
+ * \param[in] request  Request that reply is for
33afe3
+ * \param[in] data     If not NULL, add to reply as call data
33afe3
+ * \param[in] result   Full result of fencing operation
33afe3
+ *
33afe3
+ * \return Newly created XML reply
33afe3
+ * \note The caller is responsible for freeing the result.
33afe3
+ * \note This has some overlap with construct_async_reply(), but that copies
33afe3
+ *       values from an async_command_t, whereas this one copies them from the
33afe3
+ *       request.
33afe3
+ */
33afe3
 xmlNode *
33afe3
-stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
33afe3
+fenced_construct_reply(xmlNode *request, xmlNode *data,
33afe3
+                       pcmk__action_result_t *result)
33afe3
 {
33afe3
     xmlNode *reply = NULL;
33afe3
 
33afe3
@@ -2738,8 +2755,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
33afe3
 
33afe3
     crm_xml_add(reply, "st_origin", __func__);
33afe3
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
33afe3
-    crm_xml_add(reply, F_STONITH_OUTPUT, output);
33afe3
-    crm_xml_add_int(reply, F_STONITH_RC, rc);
33afe3
+    stonith__xe_set_result(reply, result);
33afe3
 
33afe3
     if (request == NULL) {
33afe3
         /* Most likely, this is the result of a stonith operation that was
33afe3
@@ -2749,12 +2765,14 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
33afe3
          * @TODO Maybe synchronize this information at start-up?
33afe3
          */
33afe3
         crm_warn("Missing request information for client notifications for "
33afe3
-                 "operation with result %d (initiated before we came up?)", rc);
33afe3
+                 "operation with result '%s' (initiated before we came up?)",
33afe3
+                 pcmk_exec_status_str(result->execution_status));
33afe3
 
33afe3
     } else {
33afe3
         const char *name = NULL;
33afe3
         const char *value = NULL;
33afe3
 
33afe3
+        // Attributes to copy from request to reply
33afe3
         const char *names[] = {
33afe3
             F_STONITH_OPERATION,
33afe3
             F_STONITH_CALLID,
33afe3
@@ -2764,8 +2782,6 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
33afe3
             F_STONITH_CALLOPTS
33afe3
         };
33afe3
 
33afe3
-        crm_trace("Creating a result reply with%s reply output (rc=%d)",
33afe3
-                  (data? "" : "out"), rc);
33afe3
         for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
33afe3
             name = names[lpc];
33afe3
             value = crm_element_value(request, name);
33afe3
@@ -3236,8 +3252,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
 done:
33afe3
     // Reply if result is known
33afe3
     if (need_reply) {
33afe3
-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
33afe3
-                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
33afe3
+        xmlNode *reply = fenced_construct_reply(request, data, &result);
33afe3
 
33afe3
         stonith_send_reply(reply, call_options, remote_peer, client_id);
33afe3
         free_xml(reply);
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 8feb401477..baa07d9e78 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -415,7 +415,14 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
     crm_xml_add(data, F_STONITH_TARGET, op->target);
33afe3
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
33afe3
 
33afe3
-    reply = stonith_construct_reply(op->request, NULL, data, rc);
33afe3
+    {
33afe3
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
+        pcmk__set_result(&result,
33afe3
+                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
33afe3
+                         stonith__legacy2status(rc), NULL);
33afe3
+        reply = fenced_construct_reply(op->request, data, &result);
33afe3
+    }
33afe3
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
33afe3
 
33afe3
     /* Send fencing OP reply to local client that initiated fencing */
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index 0006e02e7d..d5f4bc79fd 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -228,8 +228,8 @@ stonith_topology_t *find_topology_for_host(const char *host);
33afe3
 void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
33afe3
                            gboolean from_peer);
33afe3
 
33afe3
-xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
33afe3
-                                        int rc);
33afe3
+xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
33afe3
+                                pcmk__action_result_t *result);
33afe3
 
33afe3
 void
33afe3
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From b32aa252b321ff40c834d153cb23f8b3be471611 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 15:43:20 -0600
33afe3
Subject: [PATCH 08/23] Log: fencer: grab and log full result when processing
33afe3
 peer fencing replies
33afe3
33afe3
fenced_process_fencing_reply() now checks for the full result, instead of only
33afe3
a legacy return code, in peer replies, and uses it in log messages.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 63 ++++++++++++++++++++--------------
33afe3
 1 file changed, 37 insertions(+), 26 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index baa07d9e78..c6369f0051 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -2095,21 +2095,21 @@ process_remote_stonith_query(xmlNode * msg)
33afe3
 void
33afe3
 fenced_process_fencing_reply(xmlNode *msg)
33afe3
 {
33afe3
-    int rc = 0;
33afe3
     const char *id = NULL;
33afe3
     const char *device = NULL;
33afe3
     remote_fencing_op_t *op = NULL;
33afe3
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     CRM_CHECK(dev != NULL, return);
33afe3
 
33afe3
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
33afe3
     CRM_CHECK(id != NULL, return);
33afe3
 
33afe3
-    dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
33afe3
+    dev = stonith__find_xe_with_result(msg);
33afe3
     CRM_CHECK(dev != NULL, return);
33afe3
 
33afe3
-    crm_element_value_int(dev, F_STONITH_RC, &rc);
33afe3
+    stonith__xe_get_result(dev, &result);
33afe3
 
33afe3
     device = crm_element_value(dev, F_STONITH_DEVICE);
33afe3
 
33afe3
@@ -2117,7 +2117,7 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
         op = g_hash_table_lookup(stonith_remote_op_list, id);
33afe3
     }
33afe3
 
33afe3
-    if (op == NULL && rc == pcmk_ok) {
33afe3
+    if ((op == NULL) && pcmk__result_ok(&result)) {
33afe3
         /* Record successful fencing operations */
33afe3
         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
33afe3
 
33afe3
@@ -2139,16 +2139,19 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
     }
33afe3
 
33afe3
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
33afe3
-        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s "
33afe3
+        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
33afe3
                   CRM_XS " id=%.8s",
33afe3
                   op->action, op->target, op->client_name, op->originator,
33afe3
-                  pcmk_strerror(rc), op->id);
33afe3
-        if (rc == pcmk_ok) {
33afe3
+                  pcmk_exec_status_str(result.execution_status),
33afe3
+                  (result.exit_reason == NULL)? "" : " (",
33afe3
+                  (result.exit_reason == NULL)? "" : result.exit_reason,
33afe3
+                  (result.exit_reason == NULL)? "" : ")", op->id);
33afe3
+        if (pcmk__result_ok(&result)) {
33afe3
             op->state = st_done;
33afe3
         } else {
33afe3
             op->state = st_failed;
33afe3
         }
33afe3
-        remote_op_done(op, msg, rc, FALSE);
33afe3
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
         return;
33afe3
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
         /* If this isn't a remote level broadcast, and we are not the
33afe3
@@ -2162,28 +2165,35 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
33afe3
         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
33afe3
 
33afe3
-        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s "
33afe3
-                   CRM_XS " rc=%d",
33afe3
+        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
33afe3
                    op->action, op->target, device, op->client_name,
33afe3
-                   op->originator, pcmk_strerror(rc), rc);
33afe3
+                   op->originator,
33afe3
+                   pcmk_exec_status_str(result.execution_status),
33afe3
+                  (result.exit_reason == NULL)? "" : " (",
33afe3
+                  (result.exit_reason == NULL)? "" : result.exit_reason,
33afe3
+                  (result.exit_reason == NULL)? "" : ")");
33afe3
 
33afe3
         /* We own the op, and it is complete. broadcast the result to all nodes
33afe3
          * and notify our local clients. */
33afe3
         if (op->state == st_done) {
33afe3
-            remote_op_done(op, msg, rc, FALSE);
33afe3
+            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
             return;
33afe3
         }
33afe3
 
33afe3
-        if ((op->phase == 2) && (rc != pcmk_ok)) {
33afe3
+        if ((op->phase == 2) && !pcmk__result_ok(&result)) {
33afe3
             /* A remapped "on" failed, but the node was already turned off
33afe3
              * successfully, so ignore the error and continue.
33afe3
              */
33afe3
-            crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s "
33afe3
-                     "after successful 'off'", device, rc, op->target);
33afe3
-            rc = pcmk_ok;
33afe3
+            crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
33afe3
+                     "after successful 'off'",
33afe3
+                     device, pcmk_exec_status_str(result.execution_status),
33afe3
+                     (result.exit_reason == NULL)? "" : ": ",
33afe3
+                     (result.exit_reason == NULL)? "" : result.exit_reason,
33afe3
+                     op->target);
33afe3
+            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
         }
33afe3
 
33afe3
-        if (rc == pcmk_ok) {
33afe3
+        if (pcmk__result_ok(&result)) {
33afe3
             /* An operation completed successfully. Try another device if
33afe3
              * necessary, otherwise mark the operation as done. */
33afe3
             advance_topology_device_in_level(op, device, msg);
33afe3
@@ -2193,29 +2203,30 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
              * levels are available, mark this operation as failed and report results. */
33afe3
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
33afe3
                 op->state = st_failed;
33afe3
-                remote_op_done(op, msg, rc, FALSE);
33afe3
+                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
                 return;
33afe3
             }
33afe3
         }
33afe3
-    } else if (rc == pcmk_ok && op->devices == NULL) {
33afe3
+    } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
33afe3
         crm_trace("All done for %s", op->target);
33afe3
-
33afe3
         op->state = st_done;
33afe3
-        remote_op_done(op, msg, rc, FALSE);
33afe3
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
         return;
33afe3
-    } else if (rc == -ETIME && op->devices == NULL) {
33afe3
+    } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
33afe3
+               && (op->devices == NULL)) {
33afe3
         /* If the operation timed out don't bother retrying other peers. */
33afe3
         op->state = st_failed;
33afe3
-        remote_op_done(op, msg, rc, FALSE);
33afe3
+        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
         return;
33afe3
     } else {
33afe3
         /* fall-through and attempt other fencing action using another peer */
33afe3
     }
33afe3
 
33afe3
     /* Retry on failure */
33afe3
-    crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
33afe3
-              op->client_name, rc);
33afe3
-    call_remote_stonith(op, NULL, rc);
33afe3
+    crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
33afe3
+              op->target, op->originator, op->client_name,
33afe3
+              pcmk_exec_status_str(result.execution_status));
33afe3
+    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
33afe3
 }
33afe3
 
33afe3
 gboolean
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From afb5706ac606a8ea883aa1597ee63d9891cc2e13 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 15:56:30 -0600
33afe3
Subject: [PATCH 09/23] Refactor: fencer: pass full result of previous failed
33afe3
 action when initiating peer fencing
33afe3
33afe3
Rename call_remote_stonith() to request_peer_fencing() for readability, and
33afe3
make it take the full result of the previous failed action, rather than just
33afe3
its legacy return code, as an argument.
33afe3
33afe3
This does cause one change in behavior: if topology is in use, a previous
33afe3
attempt failed, and no more peers have the appropriate device, then the
33afe3
legacy return code returned will be -ENODEV rather than -EHOSTUNREACH.
33afe3
These are treated similarly internally, and hopefully that will not cause
33afe3
problems for external code.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 89 +++++++++++++++++++++++++---------
33afe3
 1 file changed, 67 insertions(+), 22 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index c6369f0051..31d5ee6e93 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -76,12 +76,13 @@ typedef struct {
33afe3
 
33afe3
 GHashTable *stonith_remote_op_list = NULL;
33afe3
 
33afe3
-void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer,
33afe3
-                         int rc);
33afe3
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
33afe3
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
33afe3
                                   int call_options);
33afe3
 
33afe3
+static void request_peer_fencing(remote_fencing_op_t *op,
33afe3
+                                peer_device_info_t *peer,
33afe3
+                                pcmk__action_result_t *result);
33afe3
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
33afe3
 static int get_op_total_timeout(const remote_fencing_op_t *op,
33afe3
                                 const peer_device_info_t *chosen_peer);
33afe3
@@ -609,12 +610,16 @@ static gboolean
33afe3
 remote_op_timeout_one(gpointer userdata)
33afe3
 {
33afe3
     remote_fencing_op_t *op = userdata;
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     op->op_timer_one = 0;
33afe3
 
33afe3
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
33afe3
                " id=%.8s", op->action, op->target, op->client_name, op->id);
33afe3
-    call_remote_stonith(op, NULL, -ETIME);
33afe3
+    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
33afe3
+
33afe3
+    // Try another device, if appropriate
33afe3
+    request_peer_fencing(op, NULL, &result);
33afe3
     return FALSE;
33afe3
 }
33afe3
 
33afe3
@@ -685,9 +690,13 @@ remote_op_query_timeout(gpointer data)
33afe3
         crm_debug("Operation %.8s targeting %s already in progress",
33afe3
                   op->id, op->target);
33afe3
     } else if (op->query_results) {
33afe3
+        // Result won't be used in this case, but we need to pass something
33afe3
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
+        // Query succeeded, so attempt the actual fencing
33afe3
         crm_debug("Query %.8s targeting %s complete (state=%s)",
33afe3
                   op->id, op->target, stonith_op_state_str(op->state));
33afe3
-        call_remote_stonith(op, NULL, pcmk_ok);
33afe3
+        request_peer_fencing(op, NULL, &result);
33afe3
     } else {
33afe3
         crm_debug("Query %.8s targeting %s timed out (state=%s)",
33afe3
                   op->id, op->target, stonith_op_state_str(op->state));
33afe3
@@ -1533,6 +1542,10 @@ static void
33afe3
 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
33afe3
                                  xmlNode *msg)
33afe3
 {
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+
33afe3
     /* Advance to the next device at this topology level, if any */
33afe3
     if (op->devices) {
33afe3
         op->devices = op->devices->next;
33afe3
@@ -1569,7 +1582,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
33afe3
             op->delay = 0;
33afe3
         }
33afe3
 
33afe3
-        call_remote_stonith(op, NULL, pcmk_ok);
33afe3
+        request_peer_fencing(op, NULL, &result);
33afe3
     } else {
33afe3
         /* We're done with all devices and phases, so finalize operation */
33afe3
         crm_trace("Marking complex fencing op targeting %s as complete",
33afe3
@@ -1598,15 +1611,30 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
33afe3
     return FALSE;
33afe3
 }
33afe3
 
33afe3
-void
33afe3
-call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Ask a peer to execute a fencing operation
33afe3
+ *
33afe3
+ * \param[in] op      Fencing operation to be executed
33afe3
+ * \param[in] peer    If NULL or topology is in use, choose best peer to execute
33afe3
+ *                    the fencing, otherwise use this peer
33afe3
+ * \param[in] result  Full result of previous failed attempt, if any (used as
33afe3
+ *                    final result only if a previous attempt failed, topology
33afe3
+ *                    is not in use, and no devices remain to be attempted)
33afe3
+ */
33afe3
+static void
33afe3
+request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
33afe3
+                    pcmk__action_result_t *result)
33afe3
 {
33afe3
     const char *device = NULL;
33afe3
-    int timeout = op->base_timeout;
33afe3
+    int timeout;
33afe3
+
33afe3
+    CRM_CHECK(op != NULL, return);
33afe3
 
33afe3
     crm_trace("Action %.8s targeting %s for %s is %s",
33afe3
               op->id, op->target, op->client_name,
33afe3
               stonith_op_state_str(op->state));
33afe3
+    timeout = op->base_timeout;
33afe3
     if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
33afe3
         peer = stonith_choose_peer(op);
33afe3
     }
33afe3
@@ -1623,9 +1651,14 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
33afe3
     }
33afe3
 
33afe3
     if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
33afe3
-        /* Ignore any peer preference, they might not have the device we need */
33afe3
-        /* When using topology, stonith_choose_peer() removes the device from
33afe3
-         * further consideration, so be sure to calculate timeout beforehand */
33afe3
+        /* Ignore the caller's peer preference if topology is in use, because
33afe3
+         * that peer might not have access to the required device. With
33afe3
+         * topology, stonith_choose_peer() removes the device from further
33afe3
+         * consideration, so the timeout must be calculated beforehand.
33afe3
+         *
33afe3
+         * @TODO Basing the total timeout on the caller's preferred peer (above)
33afe3
+         *       is less than ideal.
33afe3
+         */
33afe3
         peer = stonith_choose_peer(op);
33afe3
 
33afe3
         device = op->devices->data;
33afe3
@@ -1722,8 +1755,6 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
33afe3
         finalize_timed_out_op(op);
33afe3
 
33afe3
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
33afe3
-//        int rc = -EHOSTUNREACH;
33afe3
-
33afe3
         /* if the operation never left the query state,
33afe3
          * but we have all the expected replies, then no devices
33afe3
          * are available to execute the fencing operation. */
33afe3
@@ -1735,17 +1766,28 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
33afe3
             }
33afe3
         }
33afe3
 
33afe3
+        // This is the only case in which result will be used
33afe3
+        CRM_CHECK(result != NULL, return);
33afe3
+
33afe3
         if (op->state == st_query) {
33afe3
             crm_info("No peers (out of %d) have devices capable of fencing "
33afe3
                      "(%s) %s for client %s " CRM_XS " state=%s",
33afe3
                      op->replies, op->action, op->target, op->client_name,
33afe3
                      stonith_op_state_str(op->state));
33afe3
 
33afe3
-            rc = -ENODEV;
33afe3
+            pcmk__reset_result(result);
33afe3
+            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
33afe3
+                             NULL);
33afe3
         } else {
33afe3
             if (pcmk_is_set(op->call_options, st_opt_topology)) {
33afe3
-                rc = -EHOSTUNREACH;
33afe3
-            } 
33afe3
+                pcmk__reset_result(result);
33afe3
+                pcmk__set_result(result, CRM_EX_ERROR,
33afe3
+                                 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
33afe3
+            }
33afe3
+            /* ... else use result provided by caller -- overwriting it with
33afe3
+               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
33afe3
+               setting the correct delegate if needed.
33afe3
+             */
33afe3
 
33afe3
             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
33afe3
                      "for client %s " CRM_XS " state=%s",
33afe3
@@ -1754,7 +1796,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
33afe3
         }
33afe3
 
33afe3
         op->state = st_failed;
33afe3
-        remote_op_done(op, NULL, rc, FALSE);
33afe3
+        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
33afe3
 
33afe3
     } else {
33afe3
         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
33afe3
@@ -2004,6 +2046,7 @@ process_remote_stonith_query(xmlNode * msg)
33afe3
     peer_device_info_t *peer = NULL;
33afe3
     uint32_t replies_expected;
33afe3
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     CRM_CHECK(dev != NULL, return -EPROTO);
33afe3
 
33afe3
@@ -2038,6 +2081,8 @@ process_remote_stonith_query(xmlNode * msg)
33afe3
         peer = add_result(op, host, ndevices, dev);
33afe3
     }
33afe3
 
33afe3
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+
33afe3
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
33afe3
         /* If we start the fencing before all the topology results are in,
33afe3
          * it is possible fencing levels will be skipped because of the missing
33afe3
@@ -2045,12 +2090,12 @@ process_remote_stonith_query(xmlNode * msg)
33afe3
         if (op->state == st_query && all_topology_devices_found(op)) {
33afe3
             /* All the query results are in for the topology, start the fencing ops. */
33afe3
             crm_trace("All topology devices found");
33afe3
-            call_remote_stonith(op, peer, pcmk_ok);
33afe3
+            request_peer_fencing(op, peer, &result);
33afe3
 
33afe3
         } else if (have_all_replies) {
33afe3
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
33afe3
                      replies_expected, op->replies);
33afe3
-            call_remote_stonith(op, NULL, pcmk_ok);
33afe3
+            request_peer_fencing(op, NULL, &result);
33afe3
         }
33afe3
 
33afe3
     } else if (op->state == st_query) {
33afe3
@@ -2062,12 +2107,12 @@ process_remote_stonith_query(xmlNode * msg)
33afe3
             /* we have a verified device living on a peer that is not the target */
33afe3
             crm_trace("Found %d verified device%s",
33afe3
                       nverified, pcmk__plural_s(nverified));
33afe3
-            call_remote_stonith(op, peer, pcmk_ok);
33afe3
+            request_peer_fencing(op, peer, &result);
33afe3
 
33afe3
         } else if (have_all_replies) {
33afe3
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
33afe3
                      replies_expected, op->replies);
33afe3
-            call_remote_stonith(op, NULL, pcmk_ok);
33afe3
+            request_peer_fencing(op, NULL, &result);
33afe3
 
33afe3
         } else {
33afe3
             crm_trace("Waiting for more peer results before launching fencing operation");
33afe3
@@ -2226,7 +2271,7 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
33afe3
               op->target, op->originator, op->client_name,
33afe3
               pcmk_exec_status_str(result.execution_status));
33afe3
-    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
33afe3
+    request_peer_fencing(op, NULL, &result);
33afe3
 }
33afe3
 
33afe3
 gboolean
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 43e08ba7ee1635e47bfaf2a57636101c675b89ae Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:02:04 -0600
33afe3
Subject: [PATCH 10/23] Feature: fencer: set exit reason for timeouts waiting
33afe3
 for peer replies
33afe3
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 4 +++-
33afe3
 1 file changed, 3 insertions(+), 1 deletion(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 31d5ee6e93..415a7c1b98 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -616,7 +616,9 @@ remote_op_timeout_one(gpointer userdata)
33afe3
 
33afe3
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
33afe3
                " id=%.8s", op->action, op->target, op->client_name, op->id);
33afe3
-    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
33afe3
+    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
33afe3
+                     "Peer did not send fence result within timeout");
33afe3
+
33afe3
 
33afe3
     // Try another device, if appropriate
33afe3
     request_peer_fencing(op, NULL, &result);
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 34e5baebac78b7235825b31bebc44e3d65ae45cc Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:10:28 -0600
33afe3
Subject: [PATCH 11/23] Refactor: fencer: pass full result when handling
33afe3
 duplicate actions
33afe3
33afe3
Rename handle_duplicates() to finalize_op_duplicates() for readability, and
33afe3
make it take a full result rather than a legacy return code as an argument.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 29 +++++++++++++++++++++--------
33afe3
 1 file changed, 21 insertions(+), 8 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 415a7c1b98..850bfb6eb3 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -439,12 +439,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
     free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Finalize all duplicates of a given fencer operation
33afe3
+ *
33afe3
+ * \param[in] op         Fencer operation that completed
33afe3
+ * \param[in] data       Top-level XML to add notification to
33afe3
+ * \param[in] result     Full operation result
33afe3
+ */
33afe3
 static void
33afe3
-handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
+finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
33afe3
+                       pcmk__action_result_t *result)
33afe3
 {
33afe3
-    GList *iter = NULL;
33afe3
-
33afe3
-    for (iter = op->duplicates; iter != NULL; iter = iter->next) {
33afe3
+    for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
33afe3
         remote_fencing_op_t *other = iter->data;
33afe3
 
33afe3
         if (other->state == st_duplicate) {
33afe3
@@ -452,8 +459,9 @@ handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
             crm_debug("Performing duplicate notification for %s@%s: %s "
33afe3
                       CRM_XS " id=%.8s",
33afe3
                       other->client_name, other->originator,
33afe3
-                      pcmk_strerror(rc), other->id);
33afe3
-            remote_op_done(other, data, rc, TRUE);
33afe3
+                      pcmk_exec_status_str(result->execution_status),
33afe3
+                      other->id);
33afe3
+            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
33afe3
 
33afe3
         } else {
33afe3
             // Possible if (for example) it timed out already
33afe3
@@ -570,8 +578,13 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
33afe3
 
33afe3
     handle_local_reply_and_notify(op, data, rc);
33afe3
 
33afe3
-    if (dup == FALSE) {
33afe3
-        handle_duplicates(op, data, rc);
33afe3
+    if (!dup) {
33afe3
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
+        pcmk__set_result(&result,
33afe3
+                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
33afe3
+                         stonith__legacy2status(rc), NULL);
33afe3
+        finalize_op_duplicates(op, data, &result);
33afe3
     }
33afe3
 
33afe3
     /* Free non-essential parts of the record
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 939bd6f5f0f79b19d0cc4d869f3c8980fda2e461 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:23:20 -0600
33afe3
Subject: [PATCH 12/23] Feature: fencer: set exit reasons for fencing timeouts
33afe3
33afe3
finalize_timed_out_op() now takes an exit reason as an argument.
33afe3
It is called for fencing timeouts, peer query reply timeouts,
33afe3
and all capable nodes failing to fence.
33afe3
33afe3
At this point, the exit reason is not used, but that is planned.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 25 +++++++++++++++----------
33afe3
 1 file changed, 15 insertions(+), 10 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 850bfb6eb3..c10a32442e 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -643,10 +643,12 @@ remote_op_timeout_one(gpointer userdata)
33afe3
  * \brief Finalize a remote fencer operation that timed out
33afe3
  *
33afe3
  * \param[in] op      Fencer operation that timed out
33afe3
+ * \param[in] reason  Readable description of what step timed out
33afe3
  */
33afe3
 static void
33afe3
-finalize_timed_out_op(remote_fencing_op_t *op)
33afe3
+finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
33afe3
 {
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     op->op_timer_total = 0;
33afe3
 
33afe3
@@ -660,13 +662,13 @@ finalize_timed_out_op(remote_fencing_op_t *op)
33afe3
          * devices, and return success.
33afe3
          */
33afe3
         op->state = st_done;
33afe3
-        remote_op_done(op, NULL, pcmk_ok, FALSE);
33afe3
-        return;
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+    } else {
33afe3
+        op->state = st_failed;
33afe3
+        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
33afe3
     }
33afe3
-
33afe3
-    op->state = st_failed;
33afe3
-
33afe3
-    remote_op_done(op, NULL, -ETIME, FALSE);
33afe3
+    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+    pcmk__reset_result(&result);
33afe3
 }
33afe3
 
33afe3
 /*!
33afe3
@@ -687,7 +689,8 @@ remote_op_timeout(gpointer userdata)
33afe3
                   CRM_XS " id=%.8s",
33afe3
                   op->action, op->target, op->client_name, op->id);
33afe3
     } else {
33afe3
-        finalize_timed_out_op(userdata);
33afe3
+        finalize_timed_out_op(userdata, "Fencing could not be completed "
33afe3
+                                        "within overall timeout");
33afe3
     }
33afe3
     return G_SOURCE_REMOVE;
33afe3
 }
33afe3
@@ -719,7 +722,8 @@ remote_op_query_timeout(gpointer data)
33afe3
             g_source_remove(op->op_timer_total);
33afe3
             op->op_timer_total = 0;
33afe3
         }
33afe3
-        finalize_timed_out_op(op);
33afe3
+        finalize_timed_out_op(op, "No capable peers replied to device query "
33afe3
+                                  "within timeout");
33afe3
     }
33afe3
 
33afe3
     return FALSE;
33afe3
@@ -1767,7 +1771,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
33afe3
                  CRM_XS " state=%s", op->action, op->target, op->client_name,
33afe3
                  stonith_op_state_str(op->state));
33afe3
         CRM_CHECK(op->state < st_done, return);
33afe3
-        finalize_timed_out_op(op);
33afe3
+        finalize_timed_out_op(op, "All nodes failed, or are unable, to "
33afe3
+                                  "fence target");
33afe3
 
33afe3
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
33afe3
         /* if the operation never left the query state,
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From b80b02799260feb98723a460f2f8e8ad5cdc467f Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:32:04 -0600
33afe3
Subject: [PATCH 13/23] Refactor: fencer: pass full result when finalizing peer
33afe3
 fencing actions
33afe3
33afe3
Rename remote_op_done() to finalize_op() for readability, and make it take a
33afe3
full result as an argument, rather than a legacy return code.
33afe3
33afe3
This does cause one change in behavior: when all topology levels fail,
33afe3
the legacy return code returned will be -pcmk_err_generic instead of EINVAL.
33afe3
---
33afe3
 daemons/fenced/fenced_history.c |   2 +-
33afe3
 daemons/fenced/fenced_remote.c  | 177 ++++++++++++++++++--------------
33afe3
 2 files changed, 103 insertions(+), 76 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
33afe3
index bc159383c2..9e38ff0a20 100644
33afe3
--- a/daemons/fenced/fenced_history.c
33afe3
+++ b/daemons/fenced/fenced_history.c
33afe3
@@ -374,7 +374,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
33afe3
                 set_fencing_completed(op);
33afe3
                 /* use -EHOSTUNREACH to not introduce a new return-code that might
33afe3
                    trigger unexpected results at other places and to prevent
33afe3
-                   remote_op_done from setting the delegate if not present
33afe3
+                   finalize_op from setting the delegate if not present
33afe3
                 */
33afe3
                 stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
33afe3
             }
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index c10a32442e..aefc5f311c 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -76,13 +76,14 @@ typedef struct {
33afe3
 
33afe3
 GHashTable *stonith_remote_op_list = NULL;
33afe3
 
33afe3
-static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
33afe3
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
33afe3
                                   int call_options);
33afe3
 
33afe3
 static void request_peer_fencing(remote_fencing_op_t *op,
33afe3
                                 peer_device_info_t *peer,
33afe3
                                 pcmk__action_result_t *result);
33afe3
+static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
33afe3
+                        pcmk__action_result_t *result, bool dup);
33afe3
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
33afe3
 static int get_op_total_timeout(const remote_fencing_op_t *op,
33afe3
                                 const peer_device_info_t *chosen_peer);
33afe3
@@ -461,7 +462,7 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
33afe3
                       other->client_name, other->originator,
33afe3
                       pcmk_exec_status_str(result->execution_status),
33afe3
                       other->id);
33afe3
-            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
33afe3
+            finalize_op(other, data, result, true);
33afe3
 
33afe3
         } else {
33afe3
             // Possible if (for example) it timed out already
33afe3
@@ -487,104 +488,100 @@ delegate_from_xml(xmlNode *xml)
33afe3
 
33afe3
 /*!
33afe3
  * \internal
33afe3
- * \brief Finalize a remote operation.
33afe3
+ * \brief Finalize a peer fencing operation
33afe3
  *
33afe3
- * \description This function has two code paths.
33afe3
+ * Clean up after a fencing operation completes. This function has two code
33afe3
+ * paths: the executioner uses it to broadcast the result to CPG peers, and then
33afe3
+ * each peer (including the executioner) uses it to process that broadcast and
33afe3
+ * notify its IPC clients of the result.
33afe3
  *
33afe3
- * Path 1. This node is the owner of the operation and needs
33afe3
- *         to notify the cpg group via a broadcast as to the operation's
33afe3
- *         results.
33afe3
- *
33afe3
- * Path 2. The cpg broadcast is received. All nodes notify their local
33afe3
- *         stonith clients the operation results.
33afe3
- *
33afe3
- * So, The owner of the operation first notifies the cluster of the result,
33afe3
- * and once that cpg notify is received back it notifies all the local clients.
33afe3
- *
33afe3
- * Nodes that are passive watchers of the operation will receive the
33afe3
- * broadcast and only need to notify their local clients the operation finished.
33afe3
- *
33afe3
- * \param op, The fencing operation to finalize
33afe3
- * \param data, The xml msg reply (if present) of the last delegated fencing
33afe3
- *              operation.
33afe3
- * \param dup, Is this operation a duplicate, if so treat it a little differently
33afe3
- *             making sure the broadcast is not sent out.
33afe3
+ * \param[in] op      Fencer operation that completed
33afe3
+ * \param[in] data    If not NULL, XML reply of last delegated fencing operation
33afe3
+ * \param[in] result  Full operation result
33afe3
+ * \param[in] dup     Whether this operation is a duplicate of another
33afe3
+ *                    (in which case, do not broadcast the result)
33afe3
  */
33afe3
 static void
33afe3
-remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
33afe3
+finalize_op(remote_fencing_op_t *op, xmlNode *data,
33afe3
+            pcmk__action_result_t *result, bool dup)
33afe3
 {
33afe3
     int level = LOG_ERR;
33afe3
     const char *subt = NULL;
33afe3
     xmlNode *local_data = NULL;
33afe3
     gboolean op_merged = FALSE;
33afe3
 
33afe3
+    CRM_CHECK((op != NULL) && (result != NULL), return);
33afe3
+
33afe3
+    if (op->notify_sent) {
33afe3
+        // Most likely, this is a timed-out action that eventually completed
33afe3
+        crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
33afe3
+                   "Result arrived too late " CRM_XS " id=%.8s",
33afe3
+                   op->action, (op->target? " targeting " : ""),
33afe3
+                   (op->target? op->target : ""),
33afe3
+                   (op->delegate? op->delegate : "unknown node"),
33afe3
+                   op->client_name, op->originator,
33afe3
+                   (op_merged? " (merged)" : ""),
33afe3
+                   op->id);
33afe3
+        return;
33afe3
+    }
33afe3
+
33afe3
     set_fencing_completed(op);
33afe3
     clear_remote_op_timers(op);
33afe3
     undo_op_remap(op);
33afe3
 
33afe3
-    if (op->notify_sent == TRUE) {
33afe3
-        crm_err("Already sent notifications for '%s' targeting %s by %s for "
33afe3
-                "client %s@%s: %s " CRM_XS " rc=%d state=%s id=%.8s",
33afe3
-                op->action, op->target,
33afe3
-                (op->delegate? op->delegate : "unknown node"),
33afe3
-                op->client_name, op->originator, pcmk_strerror(rc),
33afe3
-                rc, stonith_op_state_str(op->state), op->id);
33afe3
-        goto remote_op_done_cleanup;
33afe3
-    }
33afe3
-
33afe3
     if (data == NULL) {
33afe3
         data = create_xml_node(NULL, "remote-op");
33afe3
         local_data = data;
33afe3
 
33afe3
     } else if (op->delegate == NULL) {
33afe3
-        switch (rc) {
33afe3
-            case -ENODEV:
33afe3
-            case -EHOSTUNREACH:
33afe3
+        switch (result->execution_status) {
33afe3
+            case PCMK_EXEC_NO_FENCE_DEVICE:
33afe3
                 break;
33afe3
+            case PCMK_EXEC_INVALID:
33afe3
+                if (result->exit_status == CRM_EX_EXPIRED) {
33afe3
+                    break;
33afe3
+                }
33afe3
+                // else fall through
33afe3
             default:
33afe3
                 op->delegate = delegate_from_xml(data);
33afe3
                 break;
33afe3
         }
33afe3
     }
33afe3
 
33afe3
-    if(dup) {
33afe3
-        op_merged = TRUE;
33afe3
-    } else if (crm_element_value(data, F_STONITH_MERGED)) {
33afe3
-        op_merged = TRUE;
33afe3
-    } 
33afe3
+    if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
33afe3
+        op_merged = true;
33afe3
+    }
33afe3
 
33afe3
     /* Tell everyone the operation is done, we will continue
33afe3
      * with doing the local notifications once we receive
33afe3
      * the broadcast back. */
33afe3
     subt = crm_element_value(data, F_SUBTYPE);
33afe3
-    if (dup == FALSE && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
33afe3
+    if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
33afe3
         /* Defer notification until the bcast message arrives */
33afe3
-        stonith_bcast_result_to_peers(op, rc, (op_merged? TRUE: FALSE));
33afe3
-        goto remote_op_done_cleanup;
33afe3
+        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
33afe3
+        free_xml(local_data);
33afe3
+        return;
33afe3
     }
33afe3
 
33afe3
-    if (rc == pcmk_ok || dup) {
33afe3
-        level = LOG_NOTICE;
33afe3
-    } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
+    if (pcmk__result_ok(result) || dup
33afe3
+        || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
         level = LOG_NOTICE;
33afe3
     }
33afe3
-
33afe3
-    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s "
33afe3
+    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
33afe3
                CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
33afe3
                (op->target? op->target : ""),
33afe3
                (op->delegate? op->delegate : "unknown node"),
33afe3
                op->client_name, op->originator,
33afe3
-               (op_merged? " (merged)" : ""), pcmk_strerror(rc), op->id);
33afe3
+               (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
33afe3
+               pcmk_exec_status_str(result->execution_status),
33afe3
+               ((result->exit_reason == NULL)? "" : ": "),
33afe3
+               ((result->exit_reason == NULL)? "" : result->exit_reason),
33afe3
+               op->id);
33afe3
 
33afe3
-    handle_local_reply_and_notify(op, data, rc);
33afe3
+    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
 
33afe3
     if (!dup) {
33afe3
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
-
33afe3
-        pcmk__set_result(&result,
33afe3
-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
33afe3
-                         stonith__legacy2status(rc), NULL);
33afe3
-        finalize_op_duplicates(op, data, &result);
33afe3
+        finalize_op_duplicates(op, data, result);
33afe3
     }
33afe3
 
33afe3
     /* Free non-essential parts of the record
33afe3
@@ -594,20 +591,27 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
33afe3
         g_list_free_full(op->query_results, free_remote_query);
33afe3
         op->query_results = NULL;
33afe3
     }
33afe3
-
33afe3
     if (op->request) {
33afe3
         free_xml(op->request);
33afe3
         op->request = NULL;
33afe3
     }
33afe3
 
33afe3
-  remote_op_done_cleanup:
33afe3
     free_xml(local_data);
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Finalize a watchdog fencer op after the waiting time expires
33afe3
+ *
33afe3
+ * \param[in] userdata  Fencer operation that completed
33afe3
+ *
33afe3
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
33afe3
+ */
33afe3
 static gboolean
33afe3
 remote_op_watchdog_done(gpointer userdata)
33afe3
 {
33afe3
     remote_fencing_op_t *op = userdata;
33afe3
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
     op->op_timer_one = 0;
33afe3
 
33afe3
@@ -615,8 +619,9 @@ remote_op_watchdog_done(gpointer userdata)
33afe3
                CRM_XS " id=%.8s",
33afe3
                op->action, op->target, op->client_name, op->id);
33afe3
     op->state = st_done;
33afe3
-    remote_op_done(op, NULL, pcmk_ok, FALSE);
33afe3
-    return FALSE;
33afe3
+    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+    finalize_op(op, NULL, &result, false);
33afe3
+    return G_SOURCE_REMOVE;
33afe3
 }
33afe3
 
33afe3
 static gboolean
33afe3
@@ -667,7 +672,7 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
33afe3
         op->state = st_failed;
33afe3
         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
33afe3
     }
33afe3
-    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+    finalize_op(op, NULL, &result, false);
33afe3
     pcmk__reset_result(&result);
33afe3
 }
33afe3
 
33afe3
@@ -1064,9 +1069,13 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
33afe3
     set_fencing_completed(op);
33afe3
     op->delegate = strdup("a human");
33afe3
 
33afe3
-    // For the fencer's purposes, the fencing operation is done
33afe3
+    {
33afe3
+        // For the fencer's purposes, the fencing operation is done
33afe3
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
 
33afe3
-    remote_op_done(op, msg, pcmk_ok, FALSE);
33afe3
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
33afe3
+        finalize_op(op, msg, &result, false);
33afe3
+    }
33afe3
 
33afe3
     /* For the requester's purposes, the operation is still pending. The
33afe3
      * actual result will be sent asynchronously via the operation's done_cb().
33afe3
@@ -1200,6 +1209,16 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
33afe3
     return op;
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Create a peer fencing operation from a request, and initiate it
33afe3
+ *
33afe3
+ * \param[in] client     IPC client that made request (NULL to get from request)
33afe3
+ * \param[in] request    Request XML
33afe3
+ * \param[in] manual_ack Whether this is a manual action confirmation
33afe3
+ *
33afe3
+ * \return Newly created operation on success, otherwise NULL
33afe3
+ */
33afe3
 remote_fencing_op_t *
33afe3
 initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
33afe3
                            gboolean manual_ack)
33afe3
@@ -1234,9 +1253,17 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
33afe3
 
33afe3
     switch (op->state) {
33afe3
         case st_failed:
33afe3
-            crm_warn("Could not request peer fencing (%s) targeting %s "
33afe3
-                     CRM_XS " id=%.8s", op->action, op->target, op->id);
33afe3
-            remote_op_done(op, NULL, -EINVAL, FALSE);
33afe3
+            // advance_topology_level() exhausted levels
33afe3
+            {
33afe3
+                pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
+                pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
33afe3
+                                 "All topology levels failed");
33afe3
+                crm_warn("Could not request peer fencing (%s) targeting %s "
33afe3
+                         CRM_XS " id=%.8s", op->action, op->target, op->id);
33afe3
+                finalize_op(op, NULL, &result, false);
33afe3
+                pcmk__reset_result(&result);
33afe3
+            }
33afe3
             return op;
33afe3
 
33afe3
         case st_duplicate:
33afe3
@@ -1607,7 +1634,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
33afe3
         crm_trace("Marking complex fencing op targeting %s as complete",
33afe3
                   op->target);
33afe3
         op->state = st_done;
33afe3
-        remote_op_done(op, msg, pcmk_ok, FALSE);
33afe3
+        finalize_op(op, msg, &result, false);
33afe3
     }
33afe3
 }
33afe3
 
33afe3
@@ -1805,7 +1832,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
33afe3
                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
33afe3
             }
33afe3
             /* ... else use result provided by caller -- overwriting it with
33afe3
-               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
33afe3
+               PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
33afe3
                setting the correct delegate if needed.
33afe3
              */
33afe3
 
33afe3
@@ -1816,7 +1843,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
33afe3
         }
33afe3
 
33afe3
         op->state = st_failed;
33afe3
-        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
33afe3
+        finalize_op(op, NULL, result, false);
33afe3
 
33afe3
     } else {
33afe3
         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
33afe3
@@ -2216,7 +2243,7 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
         } else {
33afe3
             op->state = st_failed;
33afe3
         }
33afe3
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+        finalize_op(op, msg, &result, false);
33afe3
         return;
33afe3
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
         /* If this isn't a remote level broadcast, and we are not the
33afe3
@@ -2241,7 +2268,7 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
         /* We own the op, and it is complete. broadcast the result to all nodes
33afe3
          * and notify our local clients. */
33afe3
         if (op->state == st_done) {
33afe3
-            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+            finalize_op(op, msg, &result, false);
33afe3
             return;
33afe3
         }
33afe3
 
33afe3
@@ -2268,20 +2295,20 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
              * levels are available, mark this operation as failed and report results. */
33afe3
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
33afe3
                 op->state = st_failed;
33afe3
-                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+                finalize_op(op, msg, &result, false);
33afe3
                 return;
33afe3
             }
33afe3
         }
33afe3
     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
33afe3
         crm_trace("All done for %s", op->target);
33afe3
         op->state = st_done;
33afe3
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+        finalize_op(op, msg, &result, false);
33afe3
         return;
33afe3
     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
33afe3
                && (op->devices == NULL)) {
33afe3
         /* If the operation timed out don't bother retrying other peers. */
33afe3
         op->state = st_failed;
33afe3
-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
33afe3
+        finalize_op(op, msg, &result, false);
33afe3
         return;
33afe3
     } else {
33afe3
         /* fall-through and attempt other fencing action using another peer */
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 8f19c09f1b961ba9aa510b7dcd1875bbabcddcdc Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:39:23 -0600
33afe3
Subject: [PATCH 14/23] Refactor: fencer: pass full result when broadcasting
33afe3
 replies
33afe3
33afe3
Rename stonith_bcast_result_to_peers() to fenced_broadcast_op_result() for
33afe3
consistency, and make it take the full result as an argument instead of a
33afe3
legacy return code. The full result is not yet used, but that is planned.
33afe3
---
33afe3
 daemons/fenced/fenced_history.c   | 18 ++++++++++++------
33afe3
 daemons/fenced/fenced_remote.c    | 15 ++++++++++++---
33afe3
 daemons/fenced/pacemaker-fenced.h |  9 ++-------
33afe3
 3 files changed, 26 insertions(+), 16 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
33afe3
index 9e38ff0a20..1e07a9815a 100644
33afe3
--- a/daemons/fenced/fenced_history.c
33afe3
+++ b/daemons/fenced/fenced_history.c
33afe3
@@ -359,24 +359,29 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
33afe3
     }
33afe3
 
33afe3
     if (remote_history) {
33afe3
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
+
33afe3
         init_stonith_remote_op_hash_table(&stonith_remote_op_list);
33afe3
 
33afe3
         updated |= g_hash_table_size(remote_history);
33afe3
 
33afe3
         g_hash_table_iter_init(&iter, remote_history);
33afe3
         while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
33afe3
-
33afe3
             if (stonith__op_state_pending(op->state) &&
33afe3
                 pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
+
33afe3
                 crm_warn("Failing pending operation %.8s originated by us but "
33afe3
                          "known only from peer history", op->id);
33afe3
                 op->state = st_failed;
33afe3
                 set_fencing_completed(op);
33afe3
-                /* use -EHOSTUNREACH to not introduce a new return-code that might
33afe3
-                   trigger unexpected results at other places and to prevent
33afe3
-                   finalize_op from setting the delegate if not present
33afe3
-                */
33afe3
-                stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
33afe3
+
33afe3
+                /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
33afe3
+                 * from setting a delegate
33afe3
+                 */
33afe3
+                pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
33afe3
+                                 "Initiated by earlier fencer "
33afe3
+                                 "process and presumed failed");
33afe3
+                fenced_broadcast_op_result(op, &result, false);
33afe3
             }
33afe3
 
33afe3
             g_hash_table_iter_steal(&iter);
33afe3
@@ -391,6 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
33afe3
              */
33afe3
         }
33afe3
 
33afe3
+        pcmk__reset_result(&result);
33afe3
         g_hash_table_destroy(remote_history); /* remove what is left */
33afe3
     }
33afe3
 
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index aefc5f311c..a0f026c790 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -374,12 +374,21 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
33afe3
     return notify_data;
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Broadcast a fence result notification to all CPG peers
33afe3
+ *
33afe3
+ * \param[in] op         Fencer operation that completed
33afe3
+ * \param[in] result     Full operation result
33afe3
+ * \param[in] op_merged  Whether this operation is a duplicate of another
33afe3
+ */
33afe3
 void
33afe3
-stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged)
33afe3
+fenced_broadcast_op_result(remote_fencing_op_t *op,
33afe3
+                           pcmk__action_result_t *result, bool op_merged)
33afe3
 {
33afe3
     static int count = 0;
33afe3
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
33afe3
-    xmlNode *notify_data = create_op_done_notify(op, rc);
33afe3
+    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
 
33afe3
     count++;
33afe3
     crm_trace("Broadcasting result to peers");
33afe3
@@ -558,7 +567,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
33afe3
     subt = crm_element_value(data, F_SUBTYPE);
33afe3
     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
33afe3
         /* Defer notification until the bcast message arrives */
33afe3
-        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
33afe3
+        fenced_broadcast_op_result(op, result, op_merged);
33afe3
         free_xml(local_data);
33afe3
         return;
33afe3
     }
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index d5f4bc79fd..ed47ab046c 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -153,13 +153,8 @@ typedef struct remote_fencing_op_s {
33afe3
 
33afe3
 } remote_fencing_op_t;
33afe3
 
33afe3
-/*!
33afe3
- * \internal
33afe3
- * \brief Broadcast the result of an operation to the peers.
33afe3
- * \param op, Operation whose result should be broadcast
33afe3
- * \param rc, Result of the operation
33afe3
- */
33afe3
-void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged);
33afe3
+void fenced_broadcast_op_result(remote_fencing_op_t *op,
33afe3
+                                pcmk__action_result_t *result, bool op_merged);
33afe3
 
33afe3
 // Fencer-specific client flags
33afe3
 enum st_client_flags {
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 3396e66b4c9cca895c7412b66159fd2342de1911 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:42:46 -0600
33afe3
Subject: [PATCH 15/23] Feature: fencer: add full result to local replies
33afe3
33afe3
handle_local_reply_and_notify() now takes the full result as an argument
33afe3
instead of a legacy return code, and adds it to the reply to the local
33afe3
requester. It does not add it to notifications yet, but that is planned.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 26 ++++++++++++++------------
33afe3
 1 file changed, 14 insertions(+), 12 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index a0f026c790..329e06c444 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -409,8 +409,17 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
33afe3
     return;
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Reply to a local request originator and notify all subscribed clients
33afe3
+ *
33afe3
+ * \param[in] op         Fencer operation that completed
33afe3
+ * \param[in] data       Top-level XML to add notification to
33afe3
+ * \param[in] result     Full operation result
33afe3
+ */
33afe3
 static void
33afe3
-handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
+handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
33afe3
+                              pcmk__action_result_t *result)
33afe3
 {
33afe3
     xmlNode *notify_data = NULL;
33afe3
     xmlNode *reply = NULL;
33afe3
@@ -421,26 +430,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
33afe3
     }
33afe3
 
33afe3
     /* Do notification with a clean data object */
33afe3
-    notify_data = create_op_done_notify(op, rc);
33afe3
+    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
     crm_xml_add_int(data, "state", op->state);
33afe3
     crm_xml_add(data, F_STONITH_TARGET, op->target);
33afe3
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
33afe3
 
33afe3
-    {
33afe3
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
33afe3
-
33afe3
-        pcmk__set_result(&result,
33afe3
-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
33afe3
-                         stonith__legacy2status(rc), NULL);
33afe3
-        reply = fenced_construct_reply(op->request, data, &result);
33afe3
-    }
33afe3
+    reply = fenced_construct_reply(op->request, data, result);
33afe3
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
33afe3
 
33afe3
     /* Send fencing OP reply to local client that initiated fencing */
33afe3
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
33afe3
 
33afe3
     /* bcast to all local clients that the fencing operation happend */
33afe3
-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
33afe3
+    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
     do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
 
33afe3
     /* mark this op as having notify's already sent */
33afe3
@@ -587,7 +589,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
33afe3
                ((result->exit_reason == NULL)? "" : result->exit_reason),
33afe3
                op->id);
33afe3
 
33afe3
-    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
+    handle_local_reply_and_notify(op, data, result);
33afe3
 
33afe3
     if (!dup) {
33afe3
         finalize_op_duplicates(op, data, result);
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 004583f3ef908cbd9dc6305597cb55d5ad22882c Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:47:13 -0600
33afe3
Subject: [PATCH 16/23] Refactor: fencer: pass full result when sending device
33afe3
 notifications
33afe3
33afe3
Rename do_stonith_notify_device() to fenced_send_device_notification() for
33afe3
consistency, and make it take the full result as an argument rather than a
33afe3
legacy return code. The full result is not used yet, but that is planned.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  |  4 ++--
33afe3
 daemons/fenced/pacemaker-fenced.c | 15 +++++++++++++--
33afe3
 daemons/fenced/pacemaker-fenced.h |  4 +++-
33afe3
 3 files changed, 18 insertions(+), 5 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 84f89e8daf..86a761dfab 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -3190,7 +3190,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                              PCMK_EXEC_INVALID,
33afe3
                              "Unprivileged users must register device via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
+        fenced_send_device_notification(op, &result, device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
33afe3
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
33afe3
@@ -3204,7 +3204,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                              PCMK_EXEC_INVALID,
33afe3
                              "Unprivileged users must delete device via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
+        fenced_send_device_notification(op, &result, device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
33afe3
         char *device_id = NULL;
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index 56acc93f31..42e167ce78 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -394,10 +394,21 @@ do_stonith_notify_config(const char *op, int rc,
33afe3
     free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Send notifications for a device change to subscribed clients
33afe3
+ *
33afe3
+ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD or
33afe3
+ *                    STONITH_OP_DEVICE_DEL)
33afe3
+ * \param[in] result  Operation result
33afe3
+ * \param[in] desc    ID of device that changed
33afe3
+ */
33afe3
 void
33afe3
-do_stonith_notify_device(const char *op, int rc, const char *desc)
33afe3
+fenced_send_device_notification(const char *op,
33afe3
+                                const pcmk__action_result_t *result,
33afe3
+                                const char *desc)
33afe3
 {
33afe3
-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list));
33afe3
+    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
33afe3
 }
33afe3
 
33afe3
 void
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index ed47ab046c..0b63680171 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -230,7 +230,9 @@ void
33afe3
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
33afe3
 
33afe3
 void do_stonith_notify(const char *type, int result, xmlNode *data);
33afe3
-void do_stonith_notify_device(const char *op, int rc, const char *desc);
33afe3
+void fenced_send_device_notification(const char *op,
33afe3
+                                     const pcmk__action_result_t *result,
33afe3
+                                     const char *desc);
33afe3
 void do_stonith_notify_level(const char *op, int rc, const char *desc);
33afe3
 
33afe3
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From ee0777d5ca99d8d2d7805d4a73241ab696c68751 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:51:55 -0600
33afe3
Subject: [PATCH 17/23] Refactor: fencer: pass full result when sending
33afe3
 topology notifications
33afe3
33afe3
Rename do_stonith_notify_level() to fenced_send_level_notification() for
33afe3
consistency, and make it take the full result as an argument rather than a
33afe3
legacy return code. The full result is not used yet, but that is planned.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  |  4 ++--
33afe3
 daemons/fenced/pacemaker-fenced.c | 21 +++++++++++++++------
33afe3
 daemons/fenced/pacemaker-fenced.h |  4 +++-
33afe3
 3 files changed, 20 insertions(+), 9 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 86a761dfab..2f3dbb035a 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -3216,7 +3216,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                              PCMK_EXEC_INVALID,
33afe3
                              "Unprivileged users must add level via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
+        fenced_send_level_notification(op, &result, device_id);
33afe3
         free(device_id);
33afe3
 
33afe3
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
33afe3
@@ -3229,7 +3229,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
33afe3
                              PCMK_EXEC_INVALID,
33afe3
                              "Unprivileged users must delete level via CIB");
33afe3
         }
33afe3
-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
33afe3
+        fenced_send_level_notification(op, &result, device_id);
33afe3
 
33afe3
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
33afe3
         int node_id = 0;
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index 42e167ce78..773cf57f6b 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -411,10 +411,21 @@ fenced_send_device_notification(const char *op,
33afe3
     do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Send notifications for a topology level change to subscribed clients
33afe3
+ *
33afe3
+ * \param[in] op      Notification type (STONITH_OP_LEVEL_ADD or
33afe3
+ *                    STONITH_OP_LEVEL_DEL)
33afe3
+ * \param[in] result  Operation result
33afe3
+ * \param[in] desc    String representation of level (<target>[<level_index>])
33afe3
+ */
33afe3
 void
33afe3
-do_stonith_notify_level(const char *op, int rc, const char *desc)
33afe3
+fenced_send_level_notification(const char *op,
33afe3
+                               const pcmk__action_result_t *result,
33afe3
+                               const char *desc)
33afe3
 {
33afe3
-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology));
33afe3
+    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
33afe3
 }
33afe3
 
33afe3
 static void
33afe3
@@ -429,8 +440,7 @@ topology_remove_helper(const char *node, int level)
33afe3
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
33afe3
 
33afe3
     fenced_unregister_level(data, &desc, &result);
33afe3
-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
33afe3
-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
33afe3
+    fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc);
33afe3
     pcmk__reset_result(&result);
33afe3
     free_xml(data);
33afe3
     free(desc);
33afe3
@@ -480,8 +490,7 @@ handle_topology_change(xmlNode *match, bool remove)
33afe3
     }
33afe3
 
33afe3
     fenced_register_level(match, &desc, &result);
33afe3
-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
33afe3
-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
33afe3
+    fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc);
33afe3
     pcmk__reset_result(&result);
33afe3
     free(desc);
33afe3
 }
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index 0b63680171..8503e813bf 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -233,7 +233,9 @@ void do_stonith_notify(const char *type, int result, xmlNode *data);
33afe3
 void fenced_send_device_notification(const char *op,
33afe3
                                      const pcmk__action_result_t *result,
33afe3
                                      const char *desc);
33afe3
-void do_stonith_notify_level(const char *op, int rc, const char *desc);
33afe3
+void fenced_send_level_notification(const char *op,
33afe3
+                                    const pcmk__action_result_t *result,
33afe3
+                                    const char *desc);
33afe3
 
33afe3
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
33afe3
                                                 xmlNode *request,
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From deec1ea9bcd7e0062755aa8b74358bfd12e4b9f0 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:53:26 -0600
33afe3
Subject: [PATCH 18/23] Refactor: fencer: pass full result when sending
33afe3
 configuration notifications
33afe3
33afe3
Rename do_stonith_notify_config() to send_config_notification() for
33afe3
consistency, and make it take the full result as an argument rather than a
33afe3
legacy return code. The full result is not used yet, but that is planned.
33afe3
---
33afe3
 daemons/fenced/pacemaker-fenced.c | 19 +++++++++++++++----
33afe3
 1 file changed, 15 insertions(+), 4 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index 773cf57f6b..d64358e07f 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -379,8 +379,19 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
33afe3
     crm_trace("Notify complete");
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Send notifications for a configuration change to subscribed clients
33afe3
+ *
33afe3
+ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD,
33afe3
+ *                    STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
33afe3
+ *                    STONITH_OP_LEVEL_DEL)
33afe3
+ * \param[in] result  Operation result
33afe3
+ * \param[in] desc    Description of what changed
33afe3
+ * \param[in] active  Current number of devices or topologies in use
33afe3
+ */
33afe3
 static void
33afe3
-do_stonith_notify_config(const char *op, int rc,
33afe3
+send_config_notification(const char *op, const pcmk__action_result_t *result,
33afe3
                          const char *desc, int active)
33afe3
 {
33afe3
     xmlNode *notify_data = create_xml_node(NULL, op);
33afe3
@@ -390,7 +401,7 @@ do_stonith_notify_config(const char *op, int rc,
33afe3
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
33afe3
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
33afe3
 
33afe3
-    do_stonith_notify(op, rc, notify_data);
33afe3
+    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
     free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
@@ -408,7 +419,7 @@ fenced_send_device_notification(const char *op,
33afe3
                                 const pcmk__action_result_t *result,
33afe3
                                 const char *desc)
33afe3
 {
33afe3
-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
33afe3
+    send_config_notification(op, result, desc, g_hash_table_size(device_list));
33afe3
 }
33afe3
 
33afe3
 /*!
33afe3
@@ -425,7 +436,7 @@ fenced_send_level_notification(const char *op,
33afe3
                                const pcmk__action_result_t *result,
33afe3
                                const char *desc)
33afe3
 {
33afe3
-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
33afe3
+    send_config_notification(op, result, desc, g_hash_table_size(topology));
33afe3
 }
33afe3
 
33afe3
 static void
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 432e4445b630fb158482a5f6de1e0e41697a381f Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:56:12 -0600
33afe3
Subject: [PATCH 19/23] Feature: fencer: pass full result when sending
33afe3
 notifications
33afe3
33afe3
Rename do_stonith_notify() to fenced_send_notification() for consistency, and
33afe3
make it take the full result as an argument rather than a legacy return code,
33afe3
and add the full result to the notifications.
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c  |  4 ++--
33afe3
 daemons/fenced/fenced_history.c   |  6 +++---
33afe3
 daemons/fenced/fenced_remote.c    |  6 +++---
33afe3
 daemons/fenced/pacemaker-fenced.c | 15 ++++++++++++---
33afe3
 daemons/fenced/pacemaker-fenced.h |  4 +++-
33afe3
 5 files changed, 23 insertions(+), 12 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 2f3dbb035a..54ebc12947 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -2489,8 +2489,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
33afe3
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
33afe3
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
33afe3
 
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
     }
33afe3
 }
33afe3
 
33afe3
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
33afe3
index 1e07a9815a..44310ed77b 100644
33afe3
--- a/daemons/fenced/fenced_history.c
33afe3
+++ b/daemons/fenced/fenced_history.c
33afe3
@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target,
33afe3
         g_hash_table_foreach_remove(stonith_remote_op_list,
33afe3
                              stonith_remove_history_entry,
33afe3
                              (gpointer) target);
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
     }
33afe3
 }
33afe3
 
33afe3
@@ -402,7 +402,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
33afe3
 
33afe3
     if (updated) {
33afe3
         stonith_fence_history_trim();
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
     }
33afe3
 
33afe3
     if (cnt == 0) {
33afe3
@@ -473,7 +473,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
33afe3
            is done so send a notification for anything
33afe3
            that smells like history-sync
33afe3
          */
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY_SYNCED, NULL, NULL);
33afe3
         if (crm_element_value(msg, F_STONITH_CALLID)) {
33afe3
             /* this is coming from the stonith-API
33afe3
             *
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 329e06c444..16c181b4b0 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -442,8 +442,8 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
33afe3
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
33afe3
 
33afe3
     /* bcast to all local clients that the fencing operation happend */
33afe3
-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
-    do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
+    fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
33afe3
+    fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
 
33afe3
     /* mark this op as having notify's already sent */
33afe3
     op->notify_sent = TRUE;
33afe3
@@ -1211,7 +1211,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
33afe3
 
33afe3
     if (op->state != st_duplicate) {
33afe3
         /* kick history readers */
33afe3
-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
33afe3
+        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
     }
33afe3
 
33afe3
     /* safe to trim as long as that doesn't touch pending ops */
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
33afe3
index d64358e07f..6b31b814a3 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.c
33afe3
+++ b/daemons/fenced/pacemaker-fenced.c
33afe3
@@ -356,8 +356,17 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int
33afe3
     free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Notify relevant IPC clients of a fencing operation result
33afe3
+ *
33afe3
+ * \param[in] type     Notification type
33afe3
+ * \param[in] result   Result of fencing operation (assume success if NULL)
33afe3
+ * \param[in] data     If not NULL, add to notification as call data
33afe3
+ */
33afe3
 void
33afe3
-do_stonith_notify(const char *type, int result, xmlNode *data)
33afe3
+fenced_send_notification(const char *type, const pcmk__action_result_t *result,
33afe3
+                         xmlNode *data)
33afe3
 {
33afe3
     /* TODO: Standardize the contents of data */
33afe3
     xmlNode *update_msg = create_xml_node(NULL, "notify");
33afe3
@@ -367,7 +376,7 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
33afe3
     crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
33afe3
     crm_xml_add(update_msg, F_SUBTYPE, type);
33afe3
     crm_xml_add(update_msg, F_STONITH_OPERATION, type);
33afe3
-    crm_xml_add_int(update_msg, F_STONITH_RC, result);
33afe3
+    stonith__xe_set_result(update_msg, result);
33afe3
 
33afe3
     if (data != NULL) {
33afe3
         add_message_xml(update_msg, F_STONITH_CALLDATA, data);
33afe3
@@ -401,7 +410,7 @@ send_config_notification(const char *op, const pcmk__action_result_t *result,
33afe3
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
33afe3
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
33afe3
 
33afe3
-    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
33afe3
+    fenced_send_notification(op, result, notify_data);
33afe3
     free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
33afe3
index 8503e813bf..502fcc9a29 100644
33afe3
--- a/daemons/fenced/pacemaker-fenced.h
33afe3
+++ b/daemons/fenced/pacemaker-fenced.h
33afe3
@@ -229,7 +229,9 @@ xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
33afe3
 void
33afe3
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
33afe3
 
33afe3
-void do_stonith_notify(const char *type, int result, xmlNode *data);
33afe3
+void fenced_send_notification(const char *type,
33afe3
+                              const pcmk__action_result_t *result,
33afe3
+                              xmlNode *data);
33afe3
 void fenced_send_device_notification(const char *op,
33afe3
                                      const pcmk__action_result_t *result,
33afe3
                                      const char *desc);
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 86deababe506c2bb8259538e5380b6a78dc4b770 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Fri, 19 Nov 2021 16:58:03 -0600
33afe3
Subject: [PATCH 20/23] Feature: fencer: pass full result when sending
33afe3
 notifications
33afe3
33afe3
Rename create_op_done_notify() to fencing_result2xml() for readability,
33afe3
make it take the full result as an argument rather than a legacy return code,
33afe3
and add the full result to broadcasts and notifications.
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 20 +++++++++++++++-----
33afe3
 1 file changed, 15 insertions(+), 5 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 16c181b4b0..4cf723e6df 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -356,13 +356,22 @@ undo_op_remap(remote_fencing_op_t *op)
33afe3
     }
33afe3
 }
33afe3
 
33afe3
+/*!
33afe3
+ * \internal
33afe3
+ * \brief Create notification data XML for a fencing operation result
33afe3
+ *
33afe3
+ * \param[in] op      Fencer operation that completed
33afe3
+ * \param[in] result  Full operation result
33afe3
+ *
33afe3
+ * \return Newly created XML to add as notification data
33afe3
+ * \note The caller is responsible for freeing the result.
33afe3
+ */
33afe3
 static xmlNode *
33afe3
-create_op_done_notify(remote_fencing_op_t * op, int rc)
33afe3
+fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
33afe3
 {
33afe3
     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
33afe3
 
33afe3
     crm_xml_add_int(notify_data, "state", op->state);
33afe3
-    crm_xml_add_int(notify_data, F_STONITH_RC, rc);
33afe3
     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
33afe3
     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
33afe3
     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
33afe3
@@ -371,6 +380,7 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
33afe3
     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
33afe3
     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
33afe3
 
33afe3
+    stonith__xe_set_result(notify_data, result);
33afe3
     return notify_data;
33afe3
 }
33afe3
 
33afe3
@@ -388,7 +398,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
33afe3
 {
33afe3
     static int count = 0;
33afe3
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
33afe3
-    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
+    xmlNode *notify_data = fencing_result2xml(op, result);
33afe3
 
33afe3
     count++;
33afe3
     crm_trace("Broadcasting result to peers");
33afe3
@@ -430,7 +440,6 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
33afe3
     }
33afe3
 
33afe3
     /* Do notification with a clean data object */
33afe3
-    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
33afe3
     crm_xml_add_int(data, "state", op->state);
33afe3
     crm_xml_add(data, F_STONITH_TARGET, op->target);
33afe3
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
33afe3
@@ -442,13 +451,14 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
33afe3
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
33afe3
 
33afe3
     /* bcast to all local clients that the fencing operation happend */
33afe3
+    notify_data = fencing_result2xml(op, result);
33afe3
     fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
33afe3
+    free_xml(notify_data);
33afe3
     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
33afe3
 
33afe3
     /* mark this op as having notify's already sent */
33afe3
     op->notify_sent = TRUE;
33afe3
     free_xml(reply);
33afe3
-    free_xml(notify_data);
33afe3
 }
33afe3
 
33afe3
 /*!
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 2814cde97520b63ca5f9baf3df37d73507e89d34 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Wed, 15 Dec 2021 17:40:52 -0600
33afe3
Subject: [PATCH 21/23] Low: fencer: restore check for invalid topology level
33afe3
 target
33afe3
33afe3
... per review. b7c7676c mistakenly dropped it
33afe3
---
33afe3
 daemons/fenced/fenced_commands.c | 12 +++++++++++-
33afe3
 1 file changed, 11 insertions(+), 1 deletion(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
33afe3
index 54ebc12947..1a4a791385 100644
33afe3
--- a/daemons/fenced/fenced_commands.c
33afe3
+++ b/daemons/fenced/fenced_commands.c
33afe3
@@ -1636,6 +1636,16 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
33afe3
         *desc = crm_strdup_printf("%s[%d]", target, id);
33afe3
     }
33afe3
 
33afe3
+    // Ensure a valid target was specified
33afe3
+    if ((mode < 0) || (mode > 2)) {
33afe3
+        crm_warn("Ignoring topology level registration without valid target");
33afe3
+        free(target);
33afe3
+        crm_log_xml_warn(level, "Bad level");
33afe3
+        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
33afe3
+                         "Invalid topology level target");
33afe3
+        return;
33afe3
+    }
33afe3
+
33afe3
     // Ensure level ID is in allowed range
33afe3
     if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
33afe3
         crm_warn("Ignoring topology registration for %s with invalid level %d",
33afe3
@@ -1643,7 +1653,7 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
33afe3
         free(target);
33afe3
         crm_log_xml_warn(level, "Bad level");
33afe3
         pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
33afe3
-                         "Invalid topology level");
33afe3
+                         "Invalid topology level number");
33afe3
         return;
33afe3
     }
33afe3
 
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From c82806f9e16abcea00025fd3a290477aef2d8d83 Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Tue, 21 Dec 2021 16:23:29 -0600
33afe3
Subject: [PATCH 22/23] Low: fencer: free result memory when processing fencing
33afe3
 replies
33afe3
33afe3
found in review
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 24 +++++++++++++++---------
33afe3
 1 file changed, 15 insertions(+), 9 deletions(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 4cf723e6df..9fda9ef060 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -2241,14 +2241,14 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
         /* Could be for an event that began before we started */
33afe3
         /* TODO: Record the op for later querying */
33afe3
         crm_info("Received peer result of unknown or expired operation %s", id);
33afe3
-        return;
33afe3
+        goto done;
33afe3
     }
33afe3
 
33afe3
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
33afe3
         crm_err("Received outdated reply for device %s (instead of %s) to "
33afe3
                 "fence (%s) %s. Operation already timed out at peer level.",
33afe3
                 device, (const char *) op->devices->data, op->action, op->target);
33afe3
-        return;
33afe3
+        goto done;
33afe3
     }
33afe3
 
33afe3
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
33afe3
@@ -2265,14 +2265,15 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
             op->state = st_failed;
33afe3
         }
33afe3
         finalize_op(op, msg, &result, false);
33afe3
-        return;
33afe3
+        goto done;
33afe3
+
33afe3
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
33afe3
         /* If this isn't a remote level broadcast, and we are not the
33afe3
          * originator of the operation, we should not be receiving this msg. */
33afe3
         crm_err("Received non-broadcast fencing result for operation %.8s "
33afe3
                 "we do not own (device %s targeting %s)",
33afe3
                 op->id, device, op->target);
33afe3
-        return;
33afe3
+        goto done;
33afe3
     }
33afe3
 
33afe3
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
33afe3
@@ -2290,7 +2291,7 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
          * and notify our local clients. */
33afe3
         if (op->state == st_done) {
33afe3
             finalize_op(op, msg, &result, false);
33afe3
-            return;
33afe3
+            goto done;
33afe3
         }
33afe3
 
33afe3
         if ((op->phase == 2) && !pcmk__result_ok(&result)) {
33afe3
@@ -2310,27 +2311,30 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
             /* An operation completed successfully. Try another device if
33afe3
              * necessary, otherwise mark the operation as done. */
33afe3
             advance_topology_device_in_level(op, device, msg);
33afe3
-            return;
33afe3
+            goto done;
33afe3
         } else {
33afe3
             /* This device failed, time to try another topology level. If no other
33afe3
              * levels are available, mark this operation as failed and report results. */
33afe3
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
33afe3
                 op->state = st_failed;
33afe3
                 finalize_op(op, msg, &result, false);
33afe3
-                return;
33afe3
+                goto done;
33afe3
             }
33afe3
         }
33afe3
+
33afe3
     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
33afe3
         crm_trace("All done for %s", op->target);
33afe3
         op->state = st_done;
33afe3
         finalize_op(op, msg, &result, false);
33afe3
-        return;
33afe3
+        goto done;
33afe3
+
33afe3
     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
33afe3
                && (op->devices == NULL)) {
33afe3
         /* If the operation timed out don't bother retrying other peers. */
33afe3
         op->state = st_failed;
33afe3
         finalize_op(op, msg, &result, false);
33afe3
-        return;
33afe3
+        goto done;
33afe3
+
33afe3
     } else {
33afe3
         /* fall-through and attempt other fencing action using another peer */
33afe3
     }
33afe3
@@ -2340,6 +2344,8 @@ fenced_process_fencing_reply(xmlNode *msg)
33afe3
               op->target, op->originator, op->client_name,
33afe3
               pcmk_exec_status_str(result.execution_status));
33afe3
     request_peer_fencing(op, NULL, &result);
33afe3
+done:
33afe3
+    pcmk__reset_result(&result);
33afe3
 }
33afe3
 
33afe3
 gboolean
33afe3
-- 
33afe3
2.27.0
33afe3
33afe3
33afe3
From 137bf97fdb39043eebb02a0d3ebbe47ee8c7044c Mon Sep 17 00:00:00 2001
33afe3
From: Ken Gaillot <kgaillot@redhat.com>
33afe3
Date: Tue, 21 Dec 2021 16:26:22 -0600
33afe3
Subject: [PATCH 23/23] Log: fencer: clarify timeout message
33afe3
33afe3
... as suggested by review
33afe3
---
33afe3
 daemons/fenced/fenced_remote.c | 2 +-
33afe3
 1 file changed, 1 insertion(+), 1 deletion(-)
33afe3
33afe3
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
33afe3
index 9fda9ef060..1e237150c5 100644
33afe3
--- a/daemons/fenced/fenced_remote.c
33afe3
+++ b/daemons/fenced/fenced_remote.c
33afe3
@@ -656,7 +656,7 @@ remote_op_timeout_one(gpointer userdata)
33afe3
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
33afe3
                " id=%.8s", op->action, op->target, op->client_name, op->id);
33afe3
     pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
33afe3
-                     "Peer did not send fence result within timeout");
33afe3
+                     "Peer did not return fence result within timeout");
33afe3
 
33afe3
 
33afe3
     // Try another device, if appropriate
33afe3
-- 
33afe3
2.27.0
33afe3