diff --git a/SOURCES/009-fencing-reasons.patch b/SOURCES/009-fencing-reasons.patch
new file mode 100644
index 0000000..3fb5bc7
--- /dev/null
+++ b/SOURCES/009-fencing-reasons.patch
@@ -0,0 +1,2985 @@
+From fcd42a5926e9a63d425586552ecc7b543838d352 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 11 Nov 2021 16:57:03 -0600
+Subject: [PATCH 01/23] Feature: fencer: pass full result in async command
+ replies
+
+The services library callbacks for async commands, which call
+send_async_reply() -> construct_async_reply() to create the reply, now add
+fields for exit status, operation status, and exit reason, in addition to the
+existing action standard output and legacy return code.
+
+Nothing uses the new fields yet.
+---
+ daemons/fenced/fenced_commands.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index f34cb4f136..3497428c18 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -2415,9 +2415,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
+     if (stand_alone) {
+         /* Do notification with a clean data object */
+         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+-        int rc = pcmk_rc2legacy(stonith__result2rc(result));
+ 
+-        crm_xml_add_int(notify_data, F_STONITH_RC, rc);
++        stonith__xe_set_result(notify_data, result);
+         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
+         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
+         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
+@@ -2425,7 +2424,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
+         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
+         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
+ 
+-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
++        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+         do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+     }
+ }
+@@ -2728,9 +2727,8 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
+     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
+     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
+     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
+-    crm_xml_add_int(reply, F_STONITH_RC,
+-                    pcmk_rc2legacy(stonith__result2rc(result)));
+-    crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout);
++
++    stonith__xe_set_result(reply, result);
+     return reply;
+ }
+ 
+-- 
+2.27.0
+
+
+From 4bac2e9811872f92571e4f5a47d8c5032cfc3016 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 18 Nov 2021 12:41:29 -0600
+Subject: [PATCH 02/23] Refactor: fencer: track full result for direct agent
+ actions
+
+This renames stonith_device_action() to execute_agent_action() for readability,
+and has it set a full result rather than return a legacy return code.
+
+As of this commit, handle_request() just maps the result back to a legacy code,
+but it will make better use of it with planned changes.
+---
+ daemons/fenced/fenced_commands.c | 95 +++++++++++++++++++-------------
+ 1 file changed, 56 insertions(+), 39 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 3497428c18..2f59ef84b7 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -1729,23 +1729,6 @@ stonith_level_remove(xmlNode *msg, char **desc)
+     return pcmk_ok;
+ }
+ 
+-/*!
+- * \internal
+- * \brief Schedule an (asynchronous) action directly on a stonith device
+- *
+- * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
+- * directly on a specified device. Only list, monitor, and status actions are
+- * expected to use this call, though it should work with any agent command.
+- *
+- * \param[in]  msg     API message XML with desired action
+- * \param[out] output  Unused
+- *
+- * \return -EINPROGRESS on success, -errno otherwise
+- * \note If the action is monitor, the device must be registered via the API
+- *       (CIB registration is not sufficient), because monitor should not be
+- *       possible unless the device is "started" (API registered).
+- */
+-
+ static char *
+ list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
+ {
+@@ -1778,8 +1761,23 @@ list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
+     return rv;
+ }
+ 
+-static int
+-stonith_device_action(xmlNode * msg, char **output)
++/*!
++ * \internal
++ * \brief Execute a fence agent action directly (and asynchronously)
++ *
++ * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
++ * directly on a specified device. Only list, monitor, and status actions are
++ * expected to use this call, though it should work with any agent command.
++ *
++ * \param[in]  msg     Request XML specifying action
++ * \param[out] result  Where to store result of action
++ *
++ * \note If the action is monitor, the device must be registered via the API
++ *       (CIB registration is not sufficient), because monitor should not be
++ *       possible unless the device is "started" (API registered).
++ */
++static void
++execute_agent_action(xmlNode *msg, pcmk__action_result_t *result)
+ {
+     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
+     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
+@@ -1792,39 +1790,56 @@ stonith_device_action(xmlNode * msg, char **output)
+         crm_info("Malformed API action request: device %s, action %s",
+                  (id? id : "not specified"),
+                  (action? action : "not specified"));
+-        return -EPROTO;
++        fenced_set_protocol_error(result);
++        return;
+     }
+ 
+     if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
++        // Watchdog agent actions are implemented internally
+         if (stonith_watchdog_timeout_ms <= 0) {
+-            return -ENODEV;
+-        } else {
+-            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
+-                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
+-                return pcmk_ok;
+-            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
+-                return pcmk_ok;
+-            }
++            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
++                             "Watchdog fence device not configured");
++            return;
++
++        } else if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
++            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++            pcmk__set_result_output(result,
++                                    list_to_string(stonith_watchdog_targets,
++                                                   "\n", TRUE),
++                                    NULL);
++            return;
++
++        } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
++            pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++            return;
+         }
+     }
+ 
+     device = g_hash_table_lookup(device_list, id);
+-    if ((device == NULL)
+-        || (!device->api_registered && !strcmp(action, "monitor"))) {
++    if (device == NULL) {
++        crm_info("Ignoring API '%s' action request because device %s not found",
++                 action, id);
++        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
++                         NULL);
++        return;
+ 
++    } else if (!device->api_registered && !strcmp(action, "monitor")) {
+         // Monitors may run only on "started" (API-registered) devices
+-        crm_info("Ignoring API '%s' action request because device %s not found",
++        crm_info("Ignoring API '%s' action request because device %s not active",
+                  action, id);
+-        return -ENODEV;
++        pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
++                         "Fence device not active");
++        return;
+     }
+ 
+     cmd = create_async_command(msg);
+     if (cmd == NULL) {
+-        return -EPROTO;
++        fenced_set_protocol_error(result);
++        return;
+     }
+ 
+     schedule_stonith_command(cmd, device);
+-    return -EINPROGRESS;
++    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+ }
+ 
+ static void
+@@ -2911,8 +2926,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ 
+     xmlNode *data = NULL;
+     bool need_reply = true;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+-    char *output = NULL;
+     const char *op = crm_element_value(request, F_STONITH_OPERATION);
+     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
+ 
+@@ -2935,8 +2950,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         need_reply = false;
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
+-        rc = stonith_device_action(request, &output);
+-        need_reply = (rc != -EINPROGRESS);
++        execute_agent_action(request, &result);
++        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
++        rc = pcmk_rc2legacy(stonith__result2rc(&result));
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
+         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
+@@ -3150,19 +3166,20 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ done:
+     // Reply if result is known
+     if (need_reply) {
+-        xmlNode *reply = stonith_construct_reply(request, output, data, rc);
++        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
+ 
+         stonith_send_reply(reply, call_options, remote_peer, client_id);
+         free_xml(reply);
+     }
+ 
+-    free(output);
+     free_xml(data);
+ 
+     crm_debug("Processed %s request from %s %s: %s (rc=%d)",
+               op, ((client == NULL)? "peer" : "client"),
+               ((client == NULL)? remote_peer : pcmk__client_name(client)),
+               ((rc > 0)? "" : pcmk_strerror(rc)), rc);
++
++    pcmk__reset_result(&result);
+ }
+ 
+ static void
+-- 
+2.27.0
+
+
+From 9601b2aff1ea6a4eef0bb2701c22c1e971a657eb Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 18 Nov 2021 17:31:20 -0600
+Subject: [PATCH 03/23] Refactor: fencer: track full result for local fencing
+
+This renames stonith_fence() to fence_locally() for readability, and has it set
+a full result rather than return a legacy return code.
+
+As of this commit, handle_request() just maps the result back to a legacy code,
+but it will make better use of it with planned changes.
+---
+ daemons/fenced/fenced_commands.c | 38 +++++++++++++++++++++-----------
+ 1 file changed, 25 insertions(+), 13 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 2f59ef84b7..bfb0d71e5f 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -2626,37 +2626,49 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
+     }
+ }
+ 
+-static int
+-stonith_fence(xmlNode * msg)
++/*!
++ * \internal
++ * \brief Execute a fence action via the local node
++ *
++ * \param[in]  msg     Fencing request
++ * \param[out] result  Where to store result of fence action
++ */
++static void
++fence_locally(xmlNode *msg, pcmk__action_result_t *result)
+ {
+     const char *device_id = NULL;
+     stonith_device_t *device = NULL;
+     async_command_t *cmd = create_async_command(msg);
+     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
+ 
++    CRM_CHECK(result != NULL, return);
++
+     if (cmd == NULL) {
+-        return -EPROTO;
++        fenced_set_protocol_error(result);
++        return;
+     }
+ 
+     device_id = crm_element_value(dev, F_STONITH_DEVICE);
+-    if (device_id) {
++    if (device_id != NULL) {
+         device = g_hash_table_lookup(device_list, device_id);
+         if (device == NULL) {
+             crm_err("Requested device '%s' is not available", device_id);
+-            return -ENODEV;
++            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
++                             "Requested fence device not found");
++            return;
+         }
+         schedule_stonith_command(cmd, device);
+ 
+     } else {
+         const char *host = crm_element_value(dev, F_STONITH_TARGET);
+ 
+-        if (cmd->options & st_opt_cs_nodeid) {
+-            int nodeid;
+-            crm_node_t *node;
++        if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) {
++            int nodeid = 0;
++            crm_node_t *node = NULL;
+ 
+             pcmk__scan_min_int(host, &nodeid, 0);
+             node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
+-            if (node) {
++            if (node != NULL) {
+                 host = node->uname;
+             }
+         }
+@@ -2666,7 +2678,7 @@ stonith_fence(xmlNode * msg)
+                             TRUE, cmd, stonith_fence_get_devices_cb);
+     }
+ 
+-    return -EINPROGRESS;
++    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+ }
+ 
+ xmlNode *
+@@ -3016,9 +3028,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         }
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
+-
+-        if (remote_peer || stand_alone) {
+-            rc = stonith_fence(request);
++        if ((remote_peer != NULL) || stand_alone) {
++            fence_locally(request, &result);
++            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+ 
+         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
+             switch (fenced_handle_manual_confirmation(client, request)) {
+-- 
+2.27.0
+
+
+From b7c7676cfd36fd72d3b29e86a23db97081e19b03 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 29 Nov 2021 17:06:52 -0600
+Subject: [PATCH 04/23] Low: fencer: handle topology level registration errors
+ better
+
+Rename stonith_level_register() to fenced_register_level() for consistency, and
+refactor it to return a full result rather than a legacy return code.
+
+Return a protocol error for missing information in the request XML, and log
+invalid level numbers at warning level. Use a new combination of
+PCMK_EXEC_INVALID with CRM_EX_INVALID_PARAM for invalid levels, so it gets
+mapped back to the legacy code -EINVAL (which was returned before).
+---
+ daemons/fenced/fenced_commands.c  | 52 +++++++++++++++++++++----------
+ daemons/fenced/pacemaker-fenced.c |  9 +++---
+ daemons/fenced/pacemaker-fenced.h |  3 +-
+ lib/fencing/st_actions.c          |  1 +
+ 4 files changed, 44 insertions(+), 21 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index bfb0d71e5f..975f8633a4 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -1583,20 +1583,19 @@ parse_device_list(const char *devices)
+ 
+ /*!
+  * \internal
+- * \brief Register a STONITH level for a target
++ * \brief Register a fencing topology level for a target
+  *
+  * Given an XML request specifying the target name, level index, and device IDs
+  * for the level, this will create an entry for the target in the global topology
+  * table if one does not already exist, then append the specified device IDs to
+  * the entry's device list for the specified level.
+  *
+- * \param[in]  msg   XML request for STONITH level registration
+- * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
+- *
+- * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
++ * \param[in]  msg     XML request for STONITH level registration
++ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
++ * \param[out] result  Where to set result of registration
+  */
+-int
+-stonith_level_register(xmlNode *msg, char **desc)
++void
++fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+ {
+     int id = 0;
+     xmlNode *level;
+@@ -1607,6 +1606,13 @@ stonith_level_register(xmlNode *msg, char **desc)
+     stonith_key_value_t *dIter = NULL;
+     stonith_key_value_t *devices = NULL;
+ 
++    CRM_CHECK(result != NULL, return);
++
++    if (msg == NULL) {
++        fenced_set_protocol_error(result);
++        return;
++    }
++
+     /* Allow the XML here to point to the level tag directly, or wrapped in
+      * another tag. If directly, don't search by xpath, because it might give
+      * multiple hits (e.g. if the XML is the CIB).
+@@ -1614,11 +1620,15 @@ stonith_level_register(xmlNode *msg, char **desc)
+     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
+         level = msg;
+     } else {
+-        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
++        level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
++    }
++    if (level == NULL) {
++        fenced_set_protocol_error(result);
++        return;
+     }
+-    CRM_CHECK(level != NULL, return -EINVAL);
+ 
+     mode = stonith_level_kind(level);
++
+     target = stonith_level_key(level, mode);
+     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
+ 
+@@ -1626,18 +1636,26 @@ stonith_level_register(xmlNode *msg, char **desc)
+         *desc = crm_strdup_printf("%s[%d]", target, id);
+     }
+ 
+-    /* Sanity-check arguments */
+-    if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
+-        crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
++    // Ensure level ID is in allowed range
++    if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
++        crm_warn("Ignoring topology registration for %s with invalid level %d",
++                  target, id);
+         free(target);
+-        crm_log_xml_err(level, "Bad topology");
+-        return -EINVAL;
++        crm_log_xml_warn(level, "Bad level");
++        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
++                         "Invalid topology level");
++        return;
+     }
+ 
+     /* Find or create topology table entry */
+     tp = g_hash_table_lookup(topology, target);
+     if (tp == NULL) {
+         tp = calloc(1, sizeof(stonith_topology_t));
++        if (tp == NULL) {
++            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
++                             strerror(ENOMEM));
++            return;
++        }
+         tp->kind = mode;
+         tp->target = target;
+         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
+@@ -1671,7 +1689,8 @@ stonith_level_register(xmlNode *msg, char **desc)
+         crm_info("Target %s has %d active fencing level%s",
+                  tp->target, nlevels, pcmk__plural_s(nlevels));
+     }
+-    return pcmk_ok;
++
++    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+ 
+ int
+@@ -3142,7 +3161,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         char *device_id = NULL;
+ 
+         if (is_privileged(client, op)) {
+-            rc = stonith_level_register(request, &device_id);
++            fenced_register_level(request, &device_id, &result);
++            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+         } else {
+             rc = -EACCES;
+         }
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 0a8b3bf6f2..469304f67c 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -452,8 +452,8 @@ remove_cib_device(xmlXPathObjectPtr xpathObj)
+ static void
+ handle_topology_change(xmlNode *match, bool remove) 
+ {
+-    int rc;
+     char *desc = NULL;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     CRM_CHECK(match != NULL, return);
+     crm_trace("Updating %s", ID(match));
+@@ -467,9 +467,10 @@ handle_topology_change(xmlNode *match, bool remove)
+         free(key);
+     }
+ 
+-    rc = stonith_level_register(match, &desc);
+-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc);
+-
++    fenced_register_level(match, &desc, &result);
++    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
++                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
++    pcmk__reset_result(&result);
+     free(desc);
+ }
+ 
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 5162ada75d..cf114fb979 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -218,7 +218,8 @@ void stonith_device_remove(const char *id, bool from_cib);
+ 
+ char *stonith_level_key(xmlNode * msg, int mode);
+ int stonith_level_kind(xmlNode * msg);
+-int stonith_level_register(xmlNode * msg, char **desc);
++void fenced_register_level(xmlNode *msg, char **desc,
++                           pcmk__action_result_t *result);
+ 
+ int stonith_level_remove(xmlNode * msg, char **desc);
+ 
+diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
+index 7eaa8b0f2b..37fa849847 100644
+--- a/lib/fencing/st_actions.c
++++ b/lib/fencing/st_actions.c
+@@ -325,6 +325,7 @@ stonith__result2rc(const pcmk__action_result_t *result)
+          */
+         case PCMK_EXEC_INVALID:
+             switch (result->exit_status) {
++                case CRM_EX_INVALID_PARAM:      return EINVAL;
+                 case CRM_EX_INSUFFICIENT_PRIV:  return EACCES;
+                 case CRM_EX_PROTOCOL:           return EPROTO;
+ 
+-- 
+2.27.0
+
+
+From 27cedca4070328ecac1761f81c2890059af19dcf Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 29 Nov 2021 17:29:38 -0600
+Subject: [PATCH 05/23] Low: fencer: handle topology level unregistration
+ errors better
+
+Rename stonith_level_remove() to fenced_unregister_level() for consistency, and
+refactor it to return a full result rather than a legacy return code.
+
+Return a protocol error for missing information in the request XML, and log
+invalid level numbers at warning level. Use PCMK_EXEC_INVALID with
+CRM_EX_INVALID_PARAM for invalid levels, so it gets mapped back to the legacy
+code -EINVAL (which reverses the recent change in ec60f014b, both for backward
+compatibility and because it makes sense -- a missing parameter is a protocol
+error, while an invalid parameter is an invalid parameter error).
+---
+ daemons/fenced/fenced_commands.c  | 52 ++++++++++++++++++++++++-------
+ daemons/fenced/pacemaker-fenced.c |  9 +++---
+ daemons/fenced/pacemaker-fenced.h |  4 +--
+ 3 files changed, 48 insertions(+), 17 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 975f8633a4..ef41dc0e52 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -1693,25 +1693,54 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+ 
+-int
+-stonith_level_remove(xmlNode *msg, char **desc)
++/*!
++ * \internal
++ * \brief Unregister a fencing topology level for a target
++ *
++ * Given an XML request specifying the target name and level index (or 0 for all
++ * levels), this will remove any corresponding entry for the target from the
++ * global topology table.
++ *
++ * \param[in]  msg     XML request for STONITH level registration
++ * \param[out] desc    If not NULL, set to string representation "TARGET[LEVEL]"
++ * \param[out] result  Where to set result of unregistration
++ */
++void
++fenced_unregister_level(xmlNode *msg, char **desc,
++                        pcmk__action_result_t *result)
+ {
+     int id = -1;
+     stonith_topology_t *tp;
+     char *target;
++    xmlNode *level = NULL;
++
++    CRM_CHECK(result != NULL, return);
+ 
+-    /* Unlike additions, removal requests should always have one level tag */
+-    xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
++    if (msg == NULL) {
++        fenced_set_protocol_error(result);
++        return;
++    }
+ 
+-    CRM_CHECK(level != NULL, return -EPROTO);
++    // Unlike additions, removal requests should always have one level tag
++    level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_WARNING);
++    if (level == NULL) {
++        fenced_set_protocol_error(result);
++        return;
++    }
+ 
+     target = stonith_level_key(level, -1);
+     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
+ 
+-    CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX),
+-              crm_log_xml_warn(msg, "invalid level");
+-              free(target);
+-              return -EPROTO);
++    // Ensure level ID is in allowed range
++    if ((id < 0) || (id >= ST_LEVEL_MAX)) {
++        crm_warn("Ignoring topology unregistration for %s with invalid level %d",
++                  target, id);
++        free(target);
++        crm_log_xml_warn(level, "Bad level");
++        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
++                         "Invalid topology level");
++        return;
++    }
+ 
+     if (desc) {
+         *desc = crm_strdup_printf("%s[%d]", target, id);
+@@ -1745,7 +1774,7 @@ stonith_level_remove(xmlNode *msg, char **desc)
+     }
+ 
+     free(target);
+-    return pcmk_ok;
++    pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ }
+ 
+ static char *
+@@ -3173,7 +3202,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         char *device_id = NULL;
+ 
+         if (is_privileged(client, op)) {
+-            rc = stonith_level_remove(request, &device_id);
++            fenced_unregister_level(request, &device_id, &result);
++            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+         } else {
+             rc = -EACCES;
+         }
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 469304f67c..56acc93f31 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -409,17 +409,18 @@ do_stonith_notify_level(const char *op, int rc, const char *desc)
+ static void
+ topology_remove_helper(const char *node, int level)
+ {
+-    int rc;
+     char *desc = NULL;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+     xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL);
+ 
+     crm_xml_add(data, F_STONITH_ORIGIN, __func__);
+     crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level);
+     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
+ 
+-    rc = stonith_level_remove(data, &desc);
+-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc);
+-
++    fenced_unregister_level(data, &desc, &result);
++    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
++                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
++    pcmk__reset_result(&result);
+     free_xml(data);
+     free(desc);
+ }
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index cf114fb979..0006e02e7d 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -220,8 +220,8 @@ char *stonith_level_key(xmlNode * msg, int mode);
+ int stonith_level_kind(xmlNode * msg);
+ void fenced_register_level(xmlNode *msg, char **desc,
+                            pcmk__action_result_t *result);
+-
+-int stonith_level_remove(xmlNode * msg, char **desc);
++void fenced_unregister_level(xmlNode *msg, char **desc,
++                             pcmk__action_result_t *result);
+ 
+ stonith_topology_t *find_topology_for_host(const char *host);
+ 
+-- 
+2.27.0
+
+
+From 3f603defca78eb2bdd46c51a80ed04a4c773442b Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 12:22:33 -0600
+Subject: [PATCH 06/23] Log: fencer: track and log full result when handling
+ requests
+
+handle_request() now tracks and logs a full result rather than just a
+legacy return code.
+---
+ daemons/fenced/fenced_commands.c | 95 ++++++++++++++++++--------------
+ 1 file changed, 53 insertions(+), 42 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index ef41dc0e52..996c18faaa 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -2981,9 +2981,7 @@ static void
+ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                xmlNode *request, const char *remote_peer)
+ {
+-    int call_options = 0;
+-    int rc = -EOPNOTSUPP;
+-
++    int call_options = st_opt_none;
+     xmlNode *data = NULL;
+     bool need_reply = true;
+     pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+@@ -3006,13 +3004,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         pcmk__ipc_send_xml(client, id, reply, flags);
+         client->request_id = 0;
+         free_xml(reply);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         need_reply = false;
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
+         execute_agent_action(request, &result);
+         need_reply = (result.execution_status != PCMK_EXEC_PENDING);
+-        rc = pcmk_rc2legacy(stonith__result2rc(&result));
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
+         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
+@@ -3021,7 +3018,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ 
+         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
+         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         need_reply = false;
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
+@@ -3033,7 +3030,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         remove_relay_op(request);
+ 
+         stonith_query(request, remote_peer, client_id, call_options);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         need_reply = false;
+ 
+     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
+@@ -3055,7 +3052,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         }
+ 
+         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         need_reply = false;
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
+@@ -3069,27 +3066,27 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                    crm_element_value(dev, F_STONITH_TARGET));
+ 
+         if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) {
+-            rc = -EPROTO;
++            fenced_set_protocol_error(&result);
+         } else {
+-            rc = -EINPROGRESS;
++            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+             need_reply = false;
+         }
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
+         if ((remote_peer != NULL) || stand_alone) {
+             fence_locally(request, &result);
+-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+ 
+         } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
+             switch (fenced_handle_manual_confirmation(client, request)) {
+                 case pcmk_rc_ok:
+-                    rc = pcmk_ok;
++                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+                     break;
+                 case EINPROGRESS:
+-                    rc = -EINPROGRESS;
++                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING,
++                                     NULL);
+                     break;
+                 default:
+-                    rc = -EPROTO;
++                    fenced_set_protocol_error(&result);
+                     break;
+             }
+ 
+@@ -3100,17 +3097,15 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+             const char *action = crm_element_value(dev, F_STONITH_ACTION);
+             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
+ 
+-            if (client) {
++            if (client != NULL) {
+                 int tolerance = 0;
+ 
+                 crm_notice("Client %s wants to fence (%s) %s using %s",
+                            pcmk__client_name(client), action,
+                            target, (device? device : "any device"));
+-
+                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
+-
+                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
+-                    rc = pcmk_ok;
++                    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+                     goto done;
+                 }
+ 
+@@ -3143,24 +3138,24 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
+                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
+                                      FALSE);
+-                rc = -EINPROGRESS;
++                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+ 
+             } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) {
+-                rc = -EPROTO;
++                fenced_set_protocol_error(&result);
++
+             } else {
+-                rc = -EINPROGRESS;
++                pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+             }
+         }
+-        need_reply = (rc != -EINPROGRESS);
++        need_reply = (result.execution_status != PCMK_EXEC_PENDING);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
+         stonith_fence_history(request, &data, remote_peer, call_options);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         if (pcmk_is_set(call_options, st_opt_discard_reply)) {
+             /* we don't expect answers to the broadcast
+              * we might have sent out
+              */
+-            rc = pcmk_ok;
+             need_reply = false;
+         }
+ 
+@@ -3168,11 +3163,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         const char *device_id = NULL;
+ 
+         if (is_privileged(client, op)) {
+-            rc = stonith_device_register(request, &device_id, FALSE);
++            int rc = stonith_device_register(request, &device_id, FALSE);
++
++            pcmk__set_result(&result,
++                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
++                             stonith__legacy2status(rc),
++                             ((rc == pcmk_ok)? NULL : pcmk_strerror(rc)));
+         } else {
+-            rc = -EACCES;
++            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
++                             PCMK_EXEC_INVALID,
++                             "Unprivileged users must register device via CIB");
+         }
+-        do_stonith_notify_device(op, rc, device_id);
++        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
+         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
+@@ -3180,22 +3182,25 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ 
+         if (is_privileged(client, op)) {
+             stonith_device_remove(device_id, false);
+-            rc = pcmk_ok;
++            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         } else {
+-            rc = -EACCES;
++            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
++                             PCMK_EXEC_INVALID,
++                             "Unprivileged users must delete device via CIB");
+         }
+-        do_stonith_notify_device(op, rc, device_id);
++        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
+         char *device_id = NULL;
+ 
+         if (is_privileged(client, op)) {
+             fenced_register_level(request, &device_id, &result);
+-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+         } else {
+-            rc = -EACCES;
++            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
++                             PCMK_EXEC_INVALID,
++                             "Unprivileged users must add level via CIB");
+         }
+-        do_stonith_notify_level(op, rc, device_id);
++        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+         free(device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
+@@ -3203,11 +3208,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ 
+         if (is_privileged(client, op)) {
+             fenced_unregister_level(request, &device_id, &result);
+-            rc = pcmk_rc2legacy(stonith__result2rc(&result));
+         } else {
+-            rc = -EACCES;
++            pcmk__set_result(&result, CRM_EX_INSUFFICIENT_PRIV,
++                             PCMK_EXEC_INVALID,
++                             "Unprivileged users must delete level via CIB");
+         }
+-        do_stonith_notify_level(op, rc, device_id);
++        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
+ 
+     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
+         int node_id = 0;
+@@ -3216,31 +3222,36 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+         crm_element_value_int(request, XML_ATTR_ID, &node_id);
+         name = crm_element_value(request, XML_ATTR_UNAME);
+         reap_crm_member(node_id, name);
+-        rc = pcmk_ok;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         need_reply = false;
+ 
+     } else {
+         crm_err("Unknown IPC request %s from %s %s", op,
+                 ((client == NULL)? "peer" : "client"),
+                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
++        pcmk__set_result(&result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
++                         "Unknown IPC request type (bug?)");
+     }
+ 
+ done:
+     // Reply if result is known
+     if (need_reply) {
+-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data, rc);
++        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
++                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
+ 
+         stonith_send_reply(reply, call_options, remote_peer, client_id);
+         free_xml(reply);
+     }
+ 
+-    free_xml(data);
+-
+-    crm_debug("Processed %s request from %s %s: %s (rc=%d)",
++    crm_debug("Processed %s request from %s %s: %s%s%s%s",
+               op, ((client == NULL)? "peer" : "client"),
+               ((client == NULL)? remote_peer : pcmk__client_name(client)),
+-              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
++              pcmk_exec_status_str(result.execution_status),
++              (result.exit_reason == NULL)? "" : " (",
++              (result.exit_reason == NULL)? "" : result.exit_reason,
++              (result.exit_reason == NULL)? "" : ")");
+ 
++    free_xml(data);
+     pcmk__reset_result(&result);
+ }
+ 
+-- 
+2.27.0
+
+
+From 5e13199699a4e9279520b3668c072e3db49c9782 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 15:10:36 -0600
+Subject: [PATCH 07/23] Feature: fencer: pass full result in replies to
+ requests
+
+Rename stonith_construct_reply() to fenced_construct_reply() for consistency,
+make it take a full result as an argument rather than separate arguments for
+legacy return code and output, and add the full result to the reply (along with
+the legacy return code, for backward compatibility).
+
+This is used for peer query replies and some request replies (including replies
+to local clients who requested fencing). Other replies, such as those built by
+construct_async_reply(), are not affected by this commit.
+---
+ daemons/fenced/fenced_commands.c  | 33 ++++++++++++++++++++++---------
+ daemons/fenced/fenced_remote.c    |  9 ++++++++-
+ daemons/fenced/pacemaker-fenced.h |  4 ++--
+ 3 files changed, 34 insertions(+), 12 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 996c18faaa..84f89e8daf 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -2322,6 +2322,7 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
+     const char *target = NULL;
+     int timeout = 0;
+     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
+     if (dev) {
+@@ -2338,7 +2339,8 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
+     crm_log_xml_debug(msg, "Query");
+     query = calloc(1, sizeof(struct st_query_data));
+ 
+-    query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
++    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    query->reply = fenced_construct_reply(msg, NULL, &result);
+     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
+     query->client_id = client_id ? strdup(client_id) : NULL;
+     query->target = target ? strdup(target) : NULL;
+@@ -2729,8 +2731,23 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result)
+     pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL);
+ }
+ 
++/*!
++ * \internal
++ * \brief Build an XML reply for a fencing operation
++ *
++ * \param[in] request  Request that reply is for
++ * \param[in] data     If not NULL, add to reply as call data
++ * \param[in] result   Full result of fencing operation
++ *
++ * \return Newly created XML reply
++ * \note The caller is responsible for freeing the result.
++ * \note This has some overlap with construct_async_reply(), but that copies
++ *       values from an async_command_t, whereas this one copies them from the
++ *       request.
++ */
+ xmlNode *
+-stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
++fenced_construct_reply(xmlNode *request, xmlNode *data,
++                       pcmk__action_result_t *result)
+ {
+     xmlNode *reply = NULL;
+ 
+@@ -2738,8 +2755,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
+ 
+     crm_xml_add(reply, "st_origin", __func__);
+     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
+-    crm_xml_add(reply, F_STONITH_OUTPUT, output);
+-    crm_xml_add_int(reply, F_STONITH_RC, rc);
++    stonith__xe_set_result(reply, result);
+ 
+     if (request == NULL) {
+         /* Most likely, this is the result of a stonith operation that was
+@@ -2749,12 +2765,14 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
+          * @TODO Maybe synchronize this information at start-up?
+          */
+         crm_warn("Missing request information for client notifications for "
+-                 "operation with result %d (initiated before we came up?)", rc);
++                 "operation with result '%s' (initiated before we came up?)",
++                 pcmk_exec_status_str(result->execution_status));
+ 
+     } else {
+         const char *name = NULL;
+         const char *value = NULL;
+ 
++        // Attributes to copy from request to reply
+         const char *names[] = {
+             F_STONITH_OPERATION,
+             F_STONITH_CALLID,
+@@ -2764,8 +2782,6 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
+             F_STONITH_CALLOPTS
+         };
+ 
+-        crm_trace("Creating a result reply with%s reply output (rc=%d)",
+-                  (data? "" : "out"), rc);
+         for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
+             name = names[lpc];
+             value = crm_element_value(request, name);
+@@ -3236,8 +3252,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+ done:
+     // Reply if result is known
+     if (need_reply) {
+-        xmlNode *reply = stonith_construct_reply(request, result.action_stdout, data,
+-                                                 pcmk_rc2legacy(stonith__result2rc(&result)));
++        xmlNode *reply = fenced_construct_reply(request, data, &result);
+ 
+         stonith_send_reply(reply, call_options, remote_peer, client_id);
+         free_xml(reply);
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 8feb401477..baa07d9e78 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -415,7 +415,14 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
+     crm_xml_add(data, F_STONITH_TARGET, op->target);
+     crm_xml_add(data, F_STONITH_OPERATION, op->action);
+ 
+-    reply = stonith_construct_reply(op->request, NULL, data, rc);
++    {
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++        pcmk__set_result(&result,
++                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
++                         stonith__legacy2status(rc), NULL);
++        reply = fenced_construct_reply(op->request, data, &result);
++    }
+     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
+ 
+     /* Send fencing OP reply to local client that initiated fencing */
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 0006e02e7d..d5f4bc79fd 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -228,8 +228,8 @@ stonith_topology_t *find_topology_for_host(const char *host);
+ void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
+                            gboolean from_peer);
+ 
+-xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
+-                                        int rc);
++xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
++                                pcmk__action_result_t *result);
+ 
+ void
+  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
+-- 
+2.27.0
+
+
+From b32aa252b321ff40c834d153cb23f8b3be471611 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 15:43:20 -0600
+Subject: [PATCH 08/23] Log: fencer: grab and log full result when processing
+ peer fencing replies
+
+fenced_process_fencing_reply() now checks for the full result, instead of only
+a legacy return code, in peer replies, and uses it in log messages.
+---
+ daemons/fenced/fenced_remote.c | 63 ++++++++++++++++++++--------------
+ 1 file changed, 37 insertions(+), 26 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index baa07d9e78..c6369f0051 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -2095,21 +2095,21 @@ process_remote_stonith_query(xmlNode * msg)
+ void
+ fenced_process_fencing_reply(xmlNode *msg)
+ {
+-    int rc = 0;
+     const char *id = NULL;
+     const char *device = NULL;
+     remote_fencing_op_t *op = NULL;
+     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     CRM_CHECK(dev != NULL, return);
+ 
+     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
+     CRM_CHECK(id != NULL, return);
+ 
+-    dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
++    dev = stonith__find_xe_with_result(msg);
+     CRM_CHECK(dev != NULL, return);
+ 
+-    crm_element_value_int(dev, F_STONITH_RC, &rc);
++    stonith__xe_get_result(dev, &result);
+ 
+     device = crm_element_value(dev, F_STONITH_DEVICE);
+ 
+@@ -2117,7 +2117,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+         op = g_hash_table_lookup(stonith_remote_op_list, id);
+     }
+ 
+-    if (op == NULL && rc == pcmk_ok) {
++    if ((op == NULL) && pcmk__result_ok(&result)) {
+         /* Record successful fencing operations */
+         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
+ 
+@@ -2139,16 +2139,19 @@ fenced_process_fencing_reply(xmlNode *msg)
+     }
+ 
+     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
+-        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s "
++        crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
+                   CRM_XS " id=%.8s",
+                   op->action, op->target, op->client_name, op->originator,
+-                  pcmk_strerror(rc), op->id);
+-        if (rc == pcmk_ok) {
++                  pcmk_exec_status_str(result.execution_status),
++                  (result.exit_reason == NULL)? "" : " (",
++                  (result.exit_reason == NULL)? "" : result.exit_reason,
++                  (result.exit_reason == NULL)? "" : ")", op->id);
++        if (pcmk__result_ok(&result)) {
+             op->state = st_done;
+         } else {
+             op->state = st_failed;
+         }
+-        remote_op_done(op, msg, rc, FALSE);
++        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+         return;
+     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         /* If this isn't a remote level broadcast, and we are not the
+@@ -2162,28 +2165,35 @@ fenced_process_fencing_reply(xmlNode *msg)
+     if (pcmk_is_set(op->call_options, st_opt_topology)) {
+         const char *device = crm_element_value(msg, F_STONITH_DEVICE);
+ 
+-        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s "
+-                   CRM_XS " rc=%d",
++        crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
+                    op->action, op->target, device, op->client_name,
+-                   op->originator, pcmk_strerror(rc), rc);
++                   op->originator,
++                   pcmk_exec_status_str(result.execution_status),
++                  (result.exit_reason == NULL)? "" : " (",
++                  (result.exit_reason == NULL)? "" : result.exit_reason,
++                  (result.exit_reason == NULL)? "" : ")");
+ 
+         /* We own the op, and it is complete. broadcast the result to all nodes
+          * and notify our local clients. */
+         if (op->state == st_done) {
+-            remote_op_done(op, msg, rc, FALSE);
++            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+             return;
+         }
+ 
+-        if ((op->phase == 2) && (rc != pcmk_ok)) {
++        if ((op->phase == 2) && !pcmk__result_ok(&result)) {
+             /* A remapped "on" failed, but the node was already turned off
+              * successfully, so ignore the error and continue.
+              */
+-            crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s "
+-                     "after successful 'off'", device, rc, op->target);
+-            rc = pcmk_ok;
++            crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
++                     "after successful 'off'",
++                     device, pcmk_exec_status_str(result.execution_status),
++                     (result.exit_reason == NULL)? "" : ": ",
++                     (result.exit_reason == NULL)? "" : result.exit_reason,
++                     op->target);
++            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         }
+ 
+-        if (rc == pcmk_ok) {
++        if (pcmk__result_ok(&result)) {
+             /* An operation completed successfully. Try another device if
+              * necessary, otherwise mark the operation as done. */
+             advance_topology_device_in_level(op, device, msg);
+@@ -2193,29 +2203,30 @@ fenced_process_fencing_reply(xmlNode *msg)
+              * levels are available, mark this operation as failed and report results. */
+             if (advance_topology_level(op, false) != pcmk_rc_ok) {
+                 op->state = st_failed;
+-                remote_op_done(op, msg, rc, FALSE);
++                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+                 return;
+             }
+         }
+-    } else if (rc == pcmk_ok && op->devices == NULL) {
++    } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
+         crm_trace("All done for %s", op->target);
+-
+         op->state = st_done;
+-        remote_op_done(op, msg, rc, FALSE);
++        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+         return;
+-    } else if (rc == -ETIME && op->devices == NULL) {
++    } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
++               && (op->devices == NULL)) {
+         /* If the operation timed out don't bother retrying other peers. */
+         op->state = st_failed;
+-        remote_op_done(op, msg, rc, FALSE);
++        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
+         return;
+     } else {
+         /* fall-through and attempt other fencing action using another peer */
+     }
+ 
+     /* Retry on failure */
+-    crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
+-              op->client_name, rc);
+-    call_remote_stonith(op, NULL, rc);
++    crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
++              op->target, op->originator, op->client_name,
++              pcmk_exec_status_str(result.execution_status));
++    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
+ }
+ 
+ gboolean
+-- 
+2.27.0
+
+
+From afb5706ac606a8ea883aa1597ee63d9891cc2e13 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 15:56:30 -0600
+Subject: [PATCH 09/23] Refactor: fencer: pass full result of previous failed
+ action when initiating peer fencing
+
+Rename call_remote_stonith() to request_peer_fencing() for readability, and
+make it take the full result of the previous failed action, rather than just
+its legacy return code, as an argument.
+
+This does cause one change in behavior: if topology is in use, a previous
+attempt failed, and no more peers have the appropriate device, then the
+legacy return code returned will be -ENODEV rather than -EHOSTUNREACH.
+These are treated similarly internally, and hopefully that will not cause
+problems for external code.
+---
+ daemons/fenced/fenced_remote.c | 89 +++++++++++++++++++++++++---------
+ 1 file changed, 67 insertions(+), 22 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index c6369f0051..31d5ee6e93 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -76,12 +76,13 @@ typedef struct {
+ 
+ GHashTable *stonith_remote_op_list = NULL;
+ 
+-void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer,
+-                         int rc);
+ static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
+ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
+                                   int call_options);
+ 
++static void request_peer_fencing(remote_fencing_op_t *op,
++                                peer_device_info_t *peer,
++                                pcmk__action_result_t *result);
+ static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
+ static int get_op_total_timeout(const remote_fencing_op_t *op,
+                                 const peer_device_info_t *chosen_peer);
+@@ -609,12 +610,16 @@ static gboolean
+ remote_op_timeout_one(gpointer userdata)
+ {
+     remote_fencing_op_t *op = userdata;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     op->op_timer_one = 0;
+ 
+     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
+                " id=%.8s", op->action, op->target, op->client_name, op->id);
+-    call_remote_stonith(op, NULL, -ETIME);
++    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
++
++    // Try another device, if appropriate
++    request_peer_fencing(op, NULL, &result);
+     return FALSE;
+ }
+ 
+@@ -685,9 +690,13 @@ remote_op_query_timeout(gpointer data)
+         crm_debug("Operation %.8s targeting %s already in progress",
+                   op->id, op->target);
+     } else if (op->query_results) {
++        // Result won't be used in this case, but we need to pass something
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++        // Query succeeded, so attempt the actual fencing
+         crm_debug("Query %.8s targeting %s complete (state=%s)",
+                   op->id, op->target, stonith_op_state_str(op->state));
+-        call_remote_stonith(op, NULL, pcmk_ok);
++        request_peer_fencing(op, NULL, &result);
+     } else {
+         crm_debug("Query %.8s targeting %s timed out (state=%s)",
+                   op->id, op->target, stonith_op_state_str(op->state));
+@@ -1533,6 +1542,10 @@ static void
+ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+                                  xmlNode *msg)
+ {
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++
+     /* Advance to the next device at this topology level, if any */
+     if (op->devices) {
+         op->devices = op->devices->next;
+@@ -1569,7 +1582,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+             op->delay = 0;
+         }
+ 
+-        call_remote_stonith(op, NULL, pcmk_ok);
++        request_peer_fencing(op, NULL, &result);
+     } else {
+         /* We're done with all devices and phases, so finalize operation */
+         crm_trace("Marking complex fencing op targeting %s as complete",
+@@ -1598,15 +1611,30 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
+     return FALSE;
+ }
+ 
+-void
+-call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
++/*!
++ * \internal
++ * \brief Ask a peer to execute a fencing operation
++ *
++ * \param[in] op      Fencing operation to be executed
++ * \param[in] peer    If NULL or topology is in use, choose best peer to execute
++ *                    the fencing, otherwise use this peer
++ * \param[in] result  Full result of previous failed attempt, if any (used as
++ *                    final result only if a previous attempt failed, topology
++ *                    is not in use, and no devices remain to be attempted)
++ */
++static void
++request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
++                    pcmk__action_result_t *result)
+ {
+     const char *device = NULL;
+-    int timeout = op->base_timeout;
++    int timeout;
++
++    CRM_CHECK(op != NULL, return);
+ 
+     crm_trace("Action %.8s targeting %s for %s is %s",
+               op->id, op->target, op->client_name,
+               stonith_op_state_str(op->state));
++    timeout = op->base_timeout;
+     if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
+         peer = stonith_choose_peer(op);
+     }
+@@ -1623,9 +1651,14 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
+     }
+ 
+     if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
+-        /* Ignore any peer preference, they might not have the device we need */
+-        /* When using topology, stonith_choose_peer() removes the device from
+-         * further consideration, so be sure to calculate timeout beforehand */
++        /* Ignore the caller's peer preference if topology is in use, because
++         * that peer might not have access to the required device. With
++         * topology, stonith_choose_peer() removes the device from further
++         * consideration, so the timeout must be calculated beforehand.
++         *
++         * @TODO Basing the total timeout on the caller's preferred peer (above)
++         *       is less than ideal.
++         */
+         peer = stonith_choose_peer(op);
+ 
+         device = op->devices->data;
+@@ -1722,8 +1755,6 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
+         finalize_timed_out_op(op);
+ 
+     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
+-//        int rc = -EHOSTUNREACH;
+-
+         /* if the operation never left the query state,
+          * but we have all the expected replies, then no devices
+          * are available to execute the fencing operation. */
+@@ -1735,17 +1766,28 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
+             }
+         }
+ 
++        // This is the only case in which result will be used
++        CRM_CHECK(result != NULL, return);
++
+         if (op->state == st_query) {
+             crm_info("No peers (out of %d) have devices capable of fencing "
+                      "(%s) %s for client %s " CRM_XS " state=%s",
+                      op->replies, op->action, op->target, op->client_name,
+                      stonith_op_state_str(op->state));
+ 
+-            rc = -ENODEV;
++            pcmk__reset_result(result);
++            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
++                             NULL);
+         } else {
+             if (pcmk_is_set(op->call_options, st_opt_topology)) {
+-                rc = -EHOSTUNREACH;
+-            } 
++                pcmk__reset_result(result);
++                pcmk__set_result(result, CRM_EX_ERROR,
++                                 PCMK_EXEC_NO_FENCE_DEVICE, NULL);
++            }
++            /* ... else use result provided by caller -- overwriting it with
++               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
++               setting the correct delegate if needed.
++             */
+ 
+             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
+                      "for client %s " CRM_XS " state=%s",
+@@ -1754,7 +1796,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
+         }
+ 
+         op->state = st_failed;
+-        remote_op_done(op, NULL, rc, FALSE);
++        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
+ 
+     } else {
+         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
+@@ -2004,6 +2046,7 @@ process_remote_stonith_query(xmlNode * msg)
+     peer_device_info_t *peer = NULL;
+     uint32_t replies_expected;
+     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     CRM_CHECK(dev != NULL, return -EPROTO);
+ 
+@@ -2038,6 +2081,8 @@ process_remote_stonith_query(xmlNode * msg)
+         peer = add_result(op, host, ndevices, dev);
+     }
+ 
++    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++
+     if (pcmk_is_set(op->call_options, st_opt_topology)) {
+         /* If we start the fencing before all the topology results are in,
+          * it is possible fencing levels will be skipped because of the missing
+@@ -2045,12 +2090,12 @@ process_remote_stonith_query(xmlNode * msg)
+         if (op->state == st_query && all_topology_devices_found(op)) {
+             /* All the query results are in for the topology, start the fencing ops. */
+             crm_trace("All topology devices found");
+-            call_remote_stonith(op, peer, pcmk_ok);
++            request_peer_fencing(op, peer, &result);
+ 
+         } else if (have_all_replies) {
+             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
+                      replies_expected, op->replies);
+-            call_remote_stonith(op, NULL, pcmk_ok);
++            request_peer_fencing(op, NULL, &result);
+         }
+ 
+     } else if (op->state == st_query) {
+@@ -2062,12 +2107,12 @@ process_remote_stonith_query(xmlNode * msg)
+             /* we have a verified device living on a peer that is not the target */
+             crm_trace("Found %d verified device%s",
+                       nverified, pcmk__plural_s(nverified));
+-            call_remote_stonith(op, peer, pcmk_ok);
++            request_peer_fencing(op, peer, &result);
+ 
+         } else if (have_all_replies) {
+             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
+                      replies_expected, op->replies);
+-            call_remote_stonith(op, NULL, pcmk_ok);
++            request_peer_fencing(op, NULL, &result);
+ 
+         } else {
+             crm_trace("Waiting for more peer results before launching fencing operation");
+@@ -2226,7 +2271,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
+               op->target, op->originator, op->client_name,
+               pcmk_exec_status_str(result.execution_status));
+-    call_remote_stonith(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)));
++    request_peer_fencing(op, NULL, &result);
+ }
+ 
+ gboolean
+-- 
+2.27.0
+
+
+From 43e08ba7ee1635e47bfaf2a57636101c675b89ae Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:02:04 -0600
+Subject: [PATCH 10/23] Feature: fencer: set exit reason for timeouts waiting
+ for peer replies
+
+---
+ daemons/fenced/fenced_remote.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 31d5ee6e93..415a7c1b98 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -616,7 +616,9 @@ remote_op_timeout_one(gpointer userdata)
+ 
+     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
+                " id=%.8s", op->action, op->target, op->client_name, op->id);
+-    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, NULL);
++    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
++                     "Peer did not send fence result within timeout");
++
+ 
+     // Try another device, if appropriate
+     request_peer_fencing(op, NULL, &result);
+-- 
+2.27.0
+
+
+From 34e5baebac78b7235825b31bebc44e3d65ae45cc Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:10:28 -0600
+Subject: [PATCH 11/23] Refactor: fencer: pass full result when handling
+ duplicate actions
+
+Rename handle_duplicates() to finalize_op_duplicates() for readability, and
+make it take a full result rather than a legacy return code as an argument.
+---
+ daemons/fenced/fenced_remote.c | 29 +++++++++++++++++++++--------
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 415a7c1b98..850bfb6eb3 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -439,12 +439,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
+     free_xml(notify_data);
+ }
+ 
++/*!
++ * \internal
++ * \brief Finalize all duplicates of a given fencer operation
++ *
++ * \param[in] op         Fencer operation that completed
++ * \param[in] data       Top-level XML to add notification to
++ * \param[in] result     Full operation result
++ */
+ static void
+-handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
++finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
++                       pcmk__action_result_t *result)
+ {
+-    GList *iter = NULL;
+-
+-    for (iter = op->duplicates; iter != NULL; iter = iter->next) {
++    for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
+         remote_fencing_op_t *other = iter->data;
+ 
+         if (other->state == st_duplicate) {
+@@ -452,8 +459,9 @@ handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc)
+             crm_debug("Performing duplicate notification for %s@%s: %s "
+                       CRM_XS " id=%.8s",
+                       other->client_name, other->originator,
+-                      pcmk_strerror(rc), other->id);
+-            remote_op_done(other, data, rc, TRUE);
++                      pcmk_exec_status_str(result->execution_status),
++                      other->id);
++            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
+ 
+         } else {
+             // Possible if (for example) it timed out already
+@@ -570,8 +578,13 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
+ 
+     handle_local_reply_and_notify(op, data, rc);
+ 
+-    if (dup == FALSE) {
+-        handle_duplicates(op, data, rc);
++    if (!dup) {
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++        pcmk__set_result(&result,
++                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
++                         stonith__legacy2status(rc), NULL);
++        finalize_op_duplicates(op, data, &result);
+     }
+ 
+     /* Free non-essential parts of the record
+-- 
+2.27.0
+
+
+From 939bd6f5f0f79b19d0cc4d869f3c8980fda2e461 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:23:20 -0600
+Subject: [PATCH 12/23] Feature: fencer: set exit reasons for fencing timeouts
+
+finalize_timed_out_op() now takes an exit reason as an argument.
+It is called for fencing timeouts, peer query reply timeouts,
+and all capable nodes failing to fence.
+
+At this point, the exit reason is not used, but that is planned.
+---
+ daemons/fenced/fenced_remote.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 850bfb6eb3..c10a32442e 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -643,10 +643,12 @@ remote_op_timeout_one(gpointer userdata)
+  * \brief Finalize a remote fencer operation that timed out
+  *
+  * \param[in] op      Fencer operation that timed out
++ * \param[in] reason  Readable description of what step timed out
+  */
+ static void
+-finalize_timed_out_op(remote_fencing_op_t *op)
++finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
+ {
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     op->op_timer_total = 0;
+ 
+@@ -660,13 +662,13 @@ finalize_timed_out_op(remote_fencing_op_t *op)
+          * devices, and return success.
+          */
+         op->state = st_done;
+-        remote_op_done(op, NULL, pcmk_ok, FALSE);
+-        return;
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    } else {
++        op->state = st_failed;
++        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+     }
+-
+-    op->state = st_failed;
+-
+-    remote_op_done(op, NULL, -ETIME, FALSE);
++    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++    pcmk__reset_result(&result);
+ }
+ 
+ /*!
+@@ -687,7 +689,8 @@ remote_op_timeout(gpointer userdata)
+                   CRM_XS " id=%.8s",
+                   op->action, op->target, op->client_name, op->id);
+     } else {
+-        finalize_timed_out_op(userdata);
++        finalize_timed_out_op(userdata, "Fencing could not be completed "
++                                        "within overall timeout");
+     }
+     return G_SOURCE_REMOVE;
+ }
+@@ -719,7 +722,8 @@ remote_op_query_timeout(gpointer data)
+             g_source_remove(op->op_timer_total);
+             op->op_timer_total = 0;
+         }
+-        finalize_timed_out_op(op);
++        finalize_timed_out_op(op, "No capable peers replied to device query "
++                                  "within timeout");
+     }
+ 
+     return FALSE;
+@@ -1767,7 +1771,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+                  CRM_XS " state=%s", op->action, op->target, op->client_name,
+                  stonith_op_state_str(op->state));
+         CRM_CHECK(op->state < st_done, return);
+-        finalize_timed_out_op(op);
++        finalize_timed_out_op(op, "All nodes failed, or are unable, to "
++                                  "fence target");
+ 
+     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
+         /* if the operation never left the query state,
+-- 
+2.27.0
+
+
+From b80b02799260feb98723a460f2f8e8ad5cdc467f Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:32:04 -0600
+Subject: [PATCH 13/23] Refactor: fencer: pass full result when finalizing peer
+ fencing actions
+
+Rename remote_op_done() to finalize_op() for readability, and make it take a
+full result as an argument, rather than a legacy return code.
+
+This does cause one change in behavior: when all topology levels fail,
+the legacy return code returned will be -pcmk_err_generic instead of EINVAL.
+---
+ daemons/fenced/fenced_history.c |   2 +-
+ daemons/fenced/fenced_remote.c  | 177 ++++++++++++++++++--------------
+ 2 files changed, 103 insertions(+), 76 deletions(-)
+
+diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
+index bc159383c2..9e38ff0a20 100644
+--- a/daemons/fenced/fenced_history.c
++++ b/daemons/fenced/fenced_history.c
+@@ -374,7 +374,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+                 set_fencing_completed(op);
+                 /* use -EHOSTUNREACH to not introduce a new return-code that might
+                    trigger unexpected results at other places and to prevent
+-                   remote_op_done from setting the delegate if not present
++                   finalize_op from setting the delegate if not present
+                 */
+                 stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
+             }
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index c10a32442e..aefc5f311c 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -76,13 +76,14 @@ typedef struct {
+ 
+ GHashTable *stonith_remote_op_list = NULL;
+ 
+-static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
+ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
+                                   int call_options);
+ 
+ static void request_peer_fencing(remote_fencing_op_t *op,
+                                 peer_device_info_t *peer,
+                                 pcmk__action_result_t *result);
++static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
++                        pcmk__action_result_t *result, bool dup);
+ static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
+ static int get_op_total_timeout(const remote_fencing_op_t *op,
+                                 const peer_device_info_t *chosen_peer);
+@@ -461,7 +462,7 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
+                       other->client_name, other->originator,
+                       pcmk_exec_status_str(result->execution_status),
+                       other->id);
+-            remote_op_done(other, data, pcmk_rc2legacy(stonith__result2rc(result)), TRUE);
++            finalize_op(other, data, result, true);
+ 
+         } else {
+             // Possible if (for example) it timed out already
+@@ -487,104 +488,100 @@ delegate_from_xml(xmlNode *xml)
+ 
+ /*!
+  * \internal
+- * \brief Finalize a remote operation.
++ * \brief Finalize a peer fencing operation
+  *
+- * \description This function has two code paths.
++ * Clean up after a fencing operation completes. This function has two code
++ * paths: the executioner uses it to broadcast the result to CPG peers, and then
++ * each peer (including the executioner) uses it to process that broadcast and
++ * notify its IPC clients of the result.
+  *
+- * Path 1. This node is the owner of the operation and needs
+- *         to notify the cpg group via a broadcast as to the operation's
+- *         results.
+- *
+- * Path 2. The cpg broadcast is received. All nodes notify their local
+- *         stonith clients the operation results.
+- *
+- * So, The owner of the operation first notifies the cluster of the result,
+- * and once that cpg notify is received back it notifies all the local clients.
+- *
+- * Nodes that are passive watchers of the operation will receive the
+- * broadcast and only need to notify their local clients the operation finished.
+- *
+- * \param op, The fencing operation to finalize
+- * \param data, The xml msg reply (if present) of the last delegated fencing
+- *              operation.
+- * \param dup, Is this operation a duplicate, if so treat it a little differently
+- *             making sure the broadcast is not sent out.
++ * \param[in] op      Fencer operation that completed
++ * \param[in] data    If not NULL, XML reply of last delegated fencing operation
++ * \param[in] result  Full operation result
++ * \param[in] dup     Whether this operation is a duplicate of another
++ *                    (in which case, do not broadcast the result)
+  */
+ static void
+-remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
++finalize_op(remote_fencing_op_t *op, xmlNode *data,
++            pcmk__action_result_t *result, bool dup)
+ {
+     int level = LOG_ERR;
+     const char *subt = NULL;
+     xmlNode *local_data = NULL;
+     gboolean op_merged = FALSE;
+ 
++    CRM_CHECK((op != NULL) && (result != NULL), return);
++
++    if (op->notify_sent) {
++        // Most likely, this is a timed-out action that eventually completed
++        crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
++                   "Result arrived too late " CRM_XS " id=%.8s",
++                   op->action, (op->target? " targeting " : ""),
++                   (op->target? op->target : ""),
++                   (op->delegate? op->delegate : "unknown node"),
++                   op->client_name, op->originator,
++                   (op_merged? " (merged)" : ""),
++                   op->id);
++        return;
++    }
++
+     set_fencing_completed(op);
+     clear_remote_op_timers(op);
+     undo_op_remap(op);
+ 
+-    if (op->notify_sent == TRUE) {
+-        crm_err("Already sent notifications for '%s' targeting %s by %s for "
+-                "client %s@%s: %s " CRM_XS " rc=%d state=%s id=%.8s",
+-                op->action, op->target,
+-                (op->delegate? op->delegate : "unknown node"),
+-                op->client_name, op->originator, pcmk_strerror(rc),
+-                rc, stonith_op_state_str(op->state), op->id);
+-        goto remote_op_done_cleanup;
+-    }
+-
+     if (data == NULL) {
+         data = create_xml_node(NULL, "remote-op");
+         local_data = data;
+ 
+     } else if (op->delegate == NULL) {
+-        switch (rc) {
+-            case -ENODEV:
+-            case -EHOSTUNREACH:
++        switch (result->execution_status) {
++            case PCMK_EXEC_NO_FENCE_DEVICE:
+                 break;
++            case PCMK_EXEC_INVALID:
++                if (result->exit_status == CRM_EX_EXPIRED) {
++                    break;
++                }
++                // else fall through
+             default:
+                 op->delegate = delegate_from_xml(data);
+                 break;
+         }
+     }
+ 
+-    if(dup) {
+-        op_merged = TRUE;
+-    } else if (crm_element_value(data, F_STONITH_MERGED)) {
+-        op_merged = TRUE;
+-    } 
++    if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
++        op_merged = true;
++    }
+ 
+     /* Tell everyone the operation is done, we will continue
+      * with doing the local notifications once we receive
+      * the broadcast back. */
+     subt = crm_element_value(data, F_SUBTYPE);
+-    if (dup == FALSE && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
++    if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+         /* Defer notification until the bcast message arrives */
+-        stonith_bcast_result_to_peers(op, rc, (op_merged? TRUE: FALSE));
+-        goto remote_op_done_cleanup;
++        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
++        free_xml(local_data);
++        return;
+     }
+ 
+-    if (rc == pcmk_ok || dup) {
+-        level = LOG_NOTICE;
+-    } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
++    if (pcmk__result_ok(result) || dup
++        || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         level = LOG_NOTICE;
+     }
+-
+-    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s "
++    do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
+                CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
+                (op->target? op->target : ""),
+                (op->delegate? op->delegate : "unknown node"),
+                op->client_name, op->originator,
+-               (op_merged? " (merged)" : ""), pcmk_strerror(rc), op->id);
++               (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
++               pcmk_exec_status_str(result->execution_status),
++               ((result->exit_reason == NULL)? "" : ": "),
++               ((result->exit_reason == NULL)? "" : result->exit_reason),
++               op->id);
+ 
+-    handle_local_reply_and_notify(op, data, rc);
++    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
+ 
+     if (!dup) {
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-        pcmk__set_result(&result,
+-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+-                         stonith__legacy2status(rc), NULL);
+-        finalize_op_duplicates(op, data, &result);
++        finalize_op_duplicates(op, data, result);
+     }
+ 
+     /* Free non-essential parts of the record
+@@ -594,20 +591,27 @@ remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup)
+         g_list_free_full(op->query_results, free_remote_query);
+         op->query_results = NULL;
+     }
+-
+     if (op->request) {
+         free_xml(op->request);
+         op->request = NULL;
+     }
+ 
+-  remote_op_done_cleanup:
+     free_xml(local_data);
+ }
+ 
++/*!
++ * \internal
++ * \brief Finalize a watchdog fencer op after the waiting time expires
++ *
++ * \param[in] userdata  Fencer operation that completed
++ *
++ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
++ */
+ static gboolean
+ remote_op_watchdog_done(gpointer userdata)
+ {
+     remote_fencing_op_t *op = userdata;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     op->op_timer_one = 0;
+ 
+@@ -615,8 +619,9 @@ remote_op_watchdog_done(gpointer userdata)
+                CRM_XS " id=%.8s",
+                op->action, op->target, op->client_name, op->id);
+     op->state = st_done;
+-    remote_op_done(op, NULL, pcmk_ok, FALSE);
+-    return FALSE;
++    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    finalize_op(op, NULL, &result, false);
++    return G_SOURCE_REMOVE;
+ }
+ 
+ static gboolean
+@@ -667,7 +672,7 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
+         op->state = st_failed;
+         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+     }
+-    remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++    finalize_op(op, NULL, &result, false);
+     pcmk__reset_result(&result);
+ }
+ 
+@@ -1064,9 +1069,13 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
+     set_fencing_completed(op);
+     op->delegate = strdup("a human");
+ 
+-    // For the fencer's purposes, the fencing operation is done
++    {
++        // For the fencer's purposes, the fencing operation is done
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+-    remote_op_done(op, msg, pcmk_ok, FALSE);
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++        finalize_op(op, msg, &result, false);
++    }
+ 
+     /* For the requester's purposes, the operation is still pending. The
+      * actual result will be sent asynchronously via the operation's done_cb().
+@@ -1200,6 +1209,16 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
+     return op;
+ }
+ 
++/*!
++ * \internal
++ * \brief Create a peer fencing operation from a request, and initiate it
++ *
++ * \param[in] client     IPC client that made request (NULL to get from request)
++ * \param[in] request    Request XML
++ * \param[in] manual_ack Whether this is a manual action confirmation
++ *
++ * \return Newly created operation on success, otherwise NULL
++ */
+ remote_fencing_op_t *
+ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
+                            gboolean manual_ack)
+@@ -1234,9 +1253,17 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
+ 
+     switch (op->state) {
+         case st_failed:
+-            crm_warn("Could not request peer fencing (%s) targeting %s "
+-                     CRM_XS " id=%.8s", op->action, op->target, op->id);
+-            remote_op_done(op, NULL, -EINVAL, FALSE);
++            // advance_topology_level() exhausted levels
++            {
++                pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++                pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
++                                 "All topology levels failed");
++                crm_warn("Could not request peer fencing (%s) targeting %s "
++                         CRM_XS " id=%.8s", op->action, op->target, op->id);
++                finalize_op(op, NULL, &result, false);
++                pcmk__reset_result(&result);
++            }
+             return op;
+ 
+         case st_duplicate:
+@@ -1607,7 +1634,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+         crm_trace("Marking complex fencing op targeting %s as complete",
+                   op->target);
+         op->state = st_done;
+-        remote_op_done(op, msg, pcmk_ok, FALSE);
++        finalize_op(op, msg, &result, false);
+     }
+ }
+ 
+@@ -1805,7 +1832,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+             }
+             /* ... else use result provided by caller -- overwriting it with
+-               PCMK_EXEC_NO_FENCE_DEVICE would prevent remote_op_done() from
++               PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
+                setting the correct delegate if needed.
+              */
+ 
+@@ -1816,7 +1843,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+         }
+ 
+         op->state = st_failed;
+-        remote_op_done(op, NULL, pcmk_rc2legacy(stonith__result2rc(result)), FALSE);
++        finalize_op(op, NULL, result, false);
+ 
+     } else {
+         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
+@@ -2216,7 +2243,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+         } else {
+             op->state = st_failed;
+         }
+-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++        finalize_op(op, msg, &result, false);
+         return;
+     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         /* If this isn't a remote level broadcast, and we are not the
+@@ -2241,7 +2268,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+         /* We own the op, and it is complete. broadcast the result to all nodes
+          * and notify our local clients. */
+         if (op->state == st_done) {
+-            remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++            finalize_op(op, msg, &result, false);
+             return;
+         }
+ 
+@@ -2268,20 +2295,20 @@ fenced_process_fencing_reply(xmlNode *msg)
+              * levels are available, mark this operation as failed and report results. */
+             if (advance_topology_level(op, false) != pcmk_rc_ok) {
+                 op->state = st_failed;
+-                remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++                finalize_op(op, msg, &result, false);
+                 return;
+             }
+         }
+     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
+         crm_trace("All done for %s", op->target);
+         op->state = st_done;
+-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++        finalize_op(op, msg, &result, false);
+         return;
+     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
+                && (op->devices == NULL)) {
+         /* If the operation timed out don't bother retrying other peers. */
+         op->state = st_failed;
+-        remote_op_done(op, msg, pcmk_rc2legacy(stonith__result2rc(&result)), FALSE);
++        finalize_op(op, msg, &result, false);
+         return;
+     } else {
+         /* fall-through and attempt other fencing action using another peer */
+-- 
+2.27.0
+
+
+From 8f19c09f1b961ba9aa510b7dcd1875bbabcddcdc Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:39:23 -0600
+Subject: [PATCH 14/23] Refactor: fencer: pass full result when broadcasting
+ replies
+
+Rename stonith_bcast_result_to_peers() to fenced_broadcast_op_result() for
+consistency, and make it take the full result as an argument instead of a
+legacy return code. The full result is not yet used, but that is planned.
+---
+ daemons/fenced/fenced_history.c   | 18 ++++++++++++------
+ daemons/fenced/fenced_remote.c    | 15 ++++++++++++---
+ daemons/fenced/pacemaker-fenced.h |  9 ++-------
+ 3 files changed, 26 insertions(+), 16 deletions(-)
+
+diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
+index 9e38ff0a20..1e07a9815a 100644
+--- a/daemons/fenced/fenced_history.c
++++ b/daemons/fenced/fenced_history.c
+@@ -359,24 +359,29 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+     }
+ 
+     if (remote_history) {
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
+         init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+ 
+         updated |= g_hash_table_size(remote_history);
+ 
+         g_hash_table_iter_init(&iter, remote_history);
+         while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) {
+-
+             if (stonith__op_state_pending(op->state) &&
+                 pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
++
+                 crm_warn("Failing pending operation %.8s originated by us but "
+                          "known only from peer history", op->id);
+                 op->state = st_failed;
+                 set_fencing_completed(op);
+-                /* use -EHOSTUNREACH to not introduce a new return-code that might
+-                   trigger unexpected results at other places and to prevent
+-                   finalize_op from setting the delegate if not present
+-                */
+-                stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE);
++
++                /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
++                 * from setting a delegate
++                 */
++                pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
++                                 "Initiated by earlier fencer "
++                                 "process and presumed failed");
++                fenced_broadcast_op_result(op, &result, false);
+             }
+ 
+             g_hash_table_iter_steal(&iter);
+@@ -391,6 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+              */
+         }
+ 
++        pcmk__reset_result(&result);
+         g_hash_table_destroy(remote_history); /* remove what is left */
+     }
+ 
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index aefc5f311c..a0f026c790 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -374,12 +374,21 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
+     return notify_data;
+ }
+ 
++/*!
++ * \internal
++ * \brief Broadcast a fence result notification to all CPG peers
++ *
++ * \param[in] op         Fencer operation that completed
++ * \param[in] result     Full operation result
++ * \param[in] op_merged  Whether this operation is a duplicate of another
++ */
+ void
+-stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged)
++fenced_broadcast_op_result(remote_fencing_op_t *op,
++                           pcmk__action_result_t *result, bool op_merged)
+ {
+     static int count = 0;
+     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
+-    xmlNode *notify_data = create_op_done_notify(op, rc);
++    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
+ 
+     count++;
+     crm_trace("Broadcasting result to peers");
+@@ -558,7 +567,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
+     subt = crm_element_value(data, F_SUBTYPE);
+     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+         /* Defer notification until the bcast message arrives */
+-        stonith_bcast_result_to_peers(op, pcmk_rc2legacy(stonith__result2rc(result)), op_merged);
++        fenced_broadcast_op_result(op, result, op_merged);
+         free_xml(local_data);
+         return;
+     }
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index d5f4bc79fd..ed47ab046c 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -153,13 +153,8 @@ typedef struct remote_fencing_op_s {
+ 
+ } remote_fencing_op_t;
+ 
+-/*!
+- * \internal
+- * \brief Broadcast the result of an operation to the peers.
+- * \param op, Operation whose result should be broadcast
+- * \param rc, Result of the operation
+- */
+-void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged);
++void fenced_broadcast_op_result(remote_fencing_op_t *op,
++                                pcmk__action_result_t *result, bool op_merged);
+ 
+ // Fencer-specific client flags
+ enum st_client_flags {
+-- 
+2.27.0
+
+
+From 3396e66b4c9cca895c7412b66159fd2342de1911 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:42:46 -0600
+Subject: [PATCH 15/23] Feature: fencer: add full result to local replies
+
+handle_local_reply_and_notify() now takes the full result as an argument
+instead of a legacy return code, and adds it to the reply to the local
+requester. It does not add it to notifications yet, but that is planned.
+---
+ daemons/fenced/fenced_remote.c | 26 ++++++++++++++------------
+ 1 file changed, 14 insertions(+), 12 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index a0f026c790..329e06c444 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -409,8 +409,17 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
+     return;
+ }
+ 
++/*!
++ * \internal
++ * \brief Reply to a local request originator and notify all subscribed clients
++ *
++ * \param[in] op         Fencer operation that completed
++ * \param[in] data       Top-level XML to add notification to
++ * \param[in] result     Full operation result
++ */
+ static void
+-handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
++handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
++                              pcmk__action_result_t *result)
+ {
+     xmlNode *notify_data = NULL;
+     xmlNode *reply = NULL;
+@@ -421,26 +430,19 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
+     }
+ 
+     /* Do notification with a clean data object */
+-    notify_data = create_op_done_notify(op, rc);
++    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
+     crm_xml_add_int(data, "state", op->state);
+     crm_xml_add(data, F_STONITH_TARGET, op->target);
+     crm_xml_add(data, F_STONITH_OPERATION, op->action);
+ 
+-    {
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-        pcmk__set_result(&result,
+-                         ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+-                         stonith__legacy2status(rc), NULL);
+-        reply = fenced_construct_reply(op->request, data, &result);
+-    }
++    reply = fenced_construct_reply(op->request, data, result);
+     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
+ 
+     /* Send fencing OP reply to local client that initiated fencing */
+     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
+ 
+     /* bcast to all local clients that the fencing operation happend */
+-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
++    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+     do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
+ 
+     /* mark this op as having notify's already sent */
+@@ -587,7 +589,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
+                ((result->exit_reason == NULL)? "" : result->exit_reason),
+                op->id);
+ 
+-    handle_local_reply_and_notify(op, data, pcmk_rc2legacy(stonith__result2rc(result)));
++    handle_local_reply_and_notify(op, data, result);
+ 
+     if (!dup) {
+         finalize_op_duplicates(op, data, result);
+-- 
+2.27.0
+
+
+From 004583f3ef908cbd9dc6305597cb55d5ad22882c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:47:13 -0600
+Subject: [PATCH 16/23] Refactor: fencer: pass full result when sending device
+ notifications
+
+Rename do_stonith_notify_device() to fenced_send_device_notification() for
+consistency, and make it take the full result as an argument rather than a
+legacy return code. The full result is not used yet, but that is planned.
+---
+ daemons/fenced/fenced_commands.c  |  4 ++--
+ daemons/fenced/pacemaker-fenced.c | 15 +++++++++++++--
+ daemons/fenced/pacemaker-fenced.h |  4 +++-
+ 3 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 84f89e8daf..86a761dfab 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -3190,7 +3190,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                              PCMK_EXEC_INVALID,
+                              "Unprivileged users must register device via CIB");
+         }
+-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
++        fenced_send_device_notification(op, &result, device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
+         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
+@@ -3204,7 +3204,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                              PCMK_EXEC_INVALID,
+                              "Unprivileged users must delete device via CIB");
+         }
+-        do_stonith_notify_device(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
++        fenced_send_device_notification(op, &result, device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
+         char *device_id = NULL;
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 56acc93f31..42e167ce78 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -394,10 +394,21 @@ do_stonith_notify_config(const char *op, int rc,
+     free_xml(notify_data);
+ }
+ 
++/*!
++ * \internal
++ * \brief Send notifications for a device change to subscribed clients
++ *
++ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD or
++ *                    STONITH_OP_DEVICE_DEL)
++ * \param[in] result  Operation result
++ * \param[in] desc    ID of device that changed
++ */
+ void
+-do_stonith_notify_device(const char *op, int rc, const char *desc)
++fenced_send_device_notification(const char *op,
++                                const pcmk__action_result_t *result,
++                                const char *desc)
+ {
+-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list));
++    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
+ }
+ 
+ void
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index ed47ab046c..0b63680171 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -230,7 +230,9 @@ void
+  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
+ 
+ void do_stonith_notify(const char *type, int result, xmlNode *data);
+-void do_stonith_notify_device(const char *op, int rc, const char *desc);
++void fenced_send_device_notification(const char *op,
++                                     const pcmk__action_result_t *result,
++                                     const char *desc);
+ void do_stonith_notify_level(const char *op, int rc, const char *desc);
+ 
+ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
+-- 
+2.27.0
+
+
+From ee0777d5ca99d8d2d7805d4a73241ab696c68751 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:51:55 -0600
+Subject: [PATCH 17/23] Refactor: fencer: pass full result when sending
+ topology notifications
+
+Rename do_stonith_notify_level() to fenced_send_level_notification() for
+consistency, and make it take the full result as an argument rather than a
+legacy return code. The full result is not used yet, but that is planned.
+---
+ daemons/fenced/fenced_commands.c  |  4 ++--
+ daemons/fenced/pacemaker-fenced.c | 21 +++++++++++++++------
+ daemons/fenced/pacemaker-fenced.h |  4 +++-
+ 3 files changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 86a761dfab..2f3dbb035a 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -3216,7 +3216,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                              PCMK_EXEC_INVALID,
+                              "Unprivileged users must add level via CIB");
+         }
+-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
++        fenced_send_level_notification(op, &result, device_id);
+         free(device_id);
+ 
+     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
+@@ -3229,7 +3229,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
+                              PCMK_EXEC_INVALID,
+                              "Unprivileged users must delete level via CIB");
+         }
+-        do_stonith_notify_level(op, pcmk_rc2legacy(stonith__result2rc(&result)), device_id);
++        fenced_send_level_notification(op, &result, device_id);
+ 
+     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
+         int node_id = 0;
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 42e167ce78..773cf57f6b 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -411,10 +411,21 @@ fenced_send_device_notification(const char *op,
+     do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
+ }
+ 
++/*!
++ * \internal
++ * \brief Send notifications for a topology level change to subscribed clients
++ *
++ * \param[in] op      Notification type (STONITH_OP_LEVEL_ADD or
++ *                    STONITH_OP_LEVEL_DEL)
++ * \param[in] result  Operation result
++ * \param[in] desc    String representation of level (<target>[<level_index>])
++ */
+ void
+-do_stonith_notify_level(const char *op, int rc, const char *desc)
++fenced_send_level_notification(const char *op,
++                               const pcmk__action_result_t *result,
++                               const char *desc)
+ {
+-    do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology));
++    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
+ }
+ 
+ static void
+@@ -429,8 +440,7 @@ topology_remove_helper(const char *node, int level)
+     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
+ 
+     fenced_unregister_level(data, &desc, &result);
+-    do_stonith_notify_level(STONITH_OP_LEVEL_DEL,
+-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
++    fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc);
+     pcmk__reset_result(&result);
+     free_xml(data);
+     free(desc);
+@@ -480,8 +490,7 @@ handle_topology_change(xmlNode *match, bool remove)
+     }
+ 
+     fenced_register_level(match, &desc, &result);
+-    do_stonith_notify_level(STONITH_OP_LEVEL_ADD,
+-                            pcmk_rc2legacy(stonith__result2rc(&result)), desc);
++    fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc);
+     pcmk__reset_result(&result);
+     free(desc);
+ }
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 0b63680171..8503e813bf 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -233,7 +233,9 @@ void do_stonith_notify(const char *type, int result, xmlNode *data);
+ void fenced_send_device_notification(const char *op,
+                                      const pcmk__action_result_t *result,
+                                      const char *desc);
+-void do_stonith_notify_level(const char *op, int rc, const char *desc);
++void fenced_send_level_notification(const char *op,
++                                    const pcmk__action_result_t *result,
++                                    const char *desc);
+ 
+ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
+                                                 xmlNode *request,
+-- 
+2.27.0
+
+
+From deec1ea9bcd7e0062755aa8b74358bfd12e4b9f0 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:53:26 -0600
+Subject: [PATCH 18/23] Refactor: fencer: pass full result when sending
+ configuration notifications
+
+Rename do_stonith_notify_config() to send_config_notification() for
+consistency, and make it take the full result as an argument rather than a
+legacy return code. The full result is not used yet, but that is planned.
+---
+ daemons/fenced/pacemaker-fenced.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 773cf57f6b..d64358e07f 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -379,8 +379,19 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
+     crm_trace("Notify complete");
+ }
+ 
++/*!
++ * \internal
++ * \brief Send notifications for a configuration change to subscribed clients
++ *
++ * \param[in] op      Notification type (STONITH_OP_DEVICE_ADD,
++ *                    STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or
++ *                    STONITH_OP_LEVEL_DEL)
++ * \param[in] result  Operation result
++ * \param[in] desc    Description of what changed
++ * \param[in] active  Current number of devices or topologies in use
++ */
+ static void
+-do_stonith_notify_config(const char *op, int rc,
++send_config_notification(const char *op, const pcmk__action_result_t *result,
+                          const char *desc, int active)
+ {
+     xmlNode *notify_data = create_xml_node(NULL, op);
+@@ -390,7 +401,7 @@ do_stonith_notify_config(const char *op, int rc,
+     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
+     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
+ 
+-    do_stonith_notify(op, rc, notify_data);
++    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+     free_xml(notify_data);
+ }
+ 
+@@ -408,7 +419,7 @@ fenced_send_device_notification(const char *op,
+                                 const pcmk__action_result_t *result,
+                                 const char *desc)
+ {
+-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(device_list));
++    send_config_notification(op, result, desc, g_hash_table_size(device_list));
+ }
+ 
+ /*!
+@@ -425,7 +436,7 @@ fenced_send_level_notification(const char *op,
+                                const pcmk__action_result_t *result,
+                                const char *desc)
+ {
+-    do_stonith_notify_config(op, pcmk_rc2legacy(stonith__result2rc(result)), desc, g_hash_table_size(topology));
++    send_config_notification(op, result, desc, g_hash_table_size(topology));
+ }
+ 
+ static void
+-- 
+2.27.0
+
+
+From 432e4445b630fb158482a5f6de1e0e41697a381f Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:56:12 -0600
+Subject: [PATCH 19/23] Feature: fencer: pass full result when sending
+ notifications
+
+Rename do_stonith_notify() to fenced_send_notification() for consistency, and
+make it take the full result as an argument rather than a legacy return code,
+and add the full result to the notifications.
+---
+ daemons/fenced/fenced_commands.c  |  4 ++--
+ daemons/fenced/fenced_history.c   |  6 +++---
+ daemons/fenced/fenced_remote.c    |  6 +++---
+ daemons/fenced/pacemaker-fenced.c | 15 ++++++++++++---
+ daemons/fenced/pacemaker-fenced.h |  4 +++-
+ 5 files changed, 23 insertions(+), 12 deletions(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 2f3dbb035a..54ebc12947 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -2489,8 +2489,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
+         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
+         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
+ 
+-        do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
++        fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
++        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+     }
+ }
+ 
+diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
+index 1e07a9815a..44310ed77b 100644
+--- a/daemons/fenced/fenced_history.c
++++ b/daemons/fenced/fenced_history.c
+@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target,
+         g_hash_table_foreach_remove(stonith_remote_op_list,
+                              stonith_remove_history_entry,
+                              (gpointer) target);
+-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
++        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+     }
+ }
+ 
+@@ -402,7 +402,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+ 
+     if (updated) {
+         stonith_fence_history_trim();
+-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
++        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+     }
+ 
+     if (cnt == 0) {
+@@ -473,7 +473,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
+            is done so send a notification for anything
+            that smells like history-sync
+          */
+-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL);
++        fenced_send_notification(T_STONITH_NOTIFY_HISTORY_SYNCED, NULL, NULL);
+         if (crm_element_value(msg, F_STONITH_CALLID)) {
+             /* this is coming from the stonith-API
+             *
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 329e06c444..16c181b4b0 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -442,8 +442,8 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
+ 
+     /* bcast to all local clients that the fencing operation happend */
+-    do_stonith_notify(T_STONITH_NOTIFY_FENCE, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
+-    do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
++    fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
++    fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ 
+     /* mark this op as having notify's already sent */
+     op->notify_sent = TRUE;
+@@ -1211,7 +1211,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
+ 
+     if (op->state != st_duplicate) {
+         /* kick history readers */
+-        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
++        fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+     }
+ 
+     /* safe to trim as long as that doesn't touch pending ops */
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index d64358e07f..6b31b814a3 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -356,8 +356,17 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int
+     free_xml(notify_data);
+ }
+ 
++/*!
++ * \internal
++ * \brief Notify relevant IPC clients of a fencing operation result
++ *
++ * \param[in] type     Notification type
++ * \param[in] result   Result of fencing operation (assume success if NULL)
++ * \param[in] data     If not NULL, add to notification as call data
++ */
+ void
+-do_stonith_notify(const char *type, int result, xmlNode *data)
++fenced_send_notification(const char *type, const pcmk__action_result_t *result,
++                         xmlNode *data)
+ {
+     /* TODO: Standardize the contents of data */
+     xmlNode *update_msg = create_xml_node(NULL, "notify");
+@@ -367,7 +376,7 @@ do_stonith_notify(const char *type, int result, xmlNode *data)
+     crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
+     crm_xml_add(update_msg, F_SUBTYPE, type);
+     crm_xml_add(update_msg, F_STONITH_OPERATION, type);
+-    crm_xml_add_int(update_msg, F_STONITH_RC, result);
++    stonith__xe_set_result(update_msg, result);
+ 
+     if (data != NULL) {
+         add_message_xml(update_msg, F_STONITH_CALLDATA, data);
+@@ -401,7 +410,7 @@ send_config_notification(const char *op, const pcmk__action_result_t *result,
+     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
+     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
+ 
+-    do_stonith_notify(op, pcmk_rc2legacy(stonith__result2rc(result)), notify_data);
++    fenced_send_notification(op, result, notify_data);
+     free_xml(notify_data);
+ }
+ 
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 8503e813bf..502fcc9a29 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -229,7 +229,9 @@ xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
+ void
+  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
+ 
+-void do_stonith_notify(const char *type, int result, xmlNode *data);
++void fenced_send_notification(const char *type,
++                              const pcmk__action_result_t *result,
++                              xmlNode *data);
+ void fenced_send_device_notification(const char *op,
+                                      const pcmk__action_result_t *result,
+                                      const char *desc);
+-- 
+2.27.0
+
+
+From 86deababe506c2bb8259538e5380b6a78dc4b770 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 16:58:03 -0600
+Subject: [PATCH 20/23] Feature: fencer: pass full result when sending
+ notifications
+
+Rename create_op_done_notify() to fencing_result2xml() for readability,
+make it take the full result as an argument rather than a legacy return code,
+and add the full result to broadcasts and notifications.
+---
+ daemons/fenced/fenced_remote.c | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 16c181b4b0..4cf723e6df 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -356,13 +356,22 @@ undo_op_remap(remote_fencing_op_t *op)
+     }
+ }
+ 
++/*!
++ * \internal
++ * \brief Create notification data XML for a fencing operation result
++ *
++ * \param[in] op      Fencer operation that completed
++ * \param[in] result  Full operation result
++ *
++ * \return Newly created XML to add as notification data
++ * \note The caller is responsible for freeing the result.
++ */
+ static xmlNode *
+-create_op_done_notify(remote_fencing_op_t * op, int rc)
++fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
+ {
+     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+ 
+     crm_xml_add_int(notify_data, "state", op->state);
+-    crm_xml_add_int(notify_data, F_STONITH_RC, rc);
+     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
+     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
+     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
+@@ -371,6 +380,7 @@ create_op_done_notify(remote_fencing_op_t * op, int rc)
+     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
+     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
+ 
++    stonith__xe_set_result(notify_data, result);
+     return notify_data;
+ }
+ 
+@@ -388,7 +398,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op,
+ {
+     static int count = 0;
+     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
+-    xmlNode *notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
++    xmlNode *notify_data = fencing_result2xml(op, result);
+ 
+     count++;
+     crm_trace("Broadcasting result to peers");
+@@ -430,7 +440,6 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+     }
+ 
+     /* Do notification with a clean data object */
+-    notify_data = create_op_done_notify(op, pcmk_rc2legacy(stonith__result2rc(result)));
+     crm_xml_add_int(data, "state", op->state);
+     crm_xml_add(data, F_STONITH_TARGET, op->target);
+     crm_xml_add(data, F_STONITH_OPERATION, op->action);
+@@ -442,13 +451,14 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
+ 
+     /* bcast to all local clients that the fencing operation happend */
++    notify_data = fencing_result2xml(op, result);
+     fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
++    free_xml(notify_data);
+     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ 
+     /* mark this op as having notify's already sent */
+     op->notify_sent = TRUE;
+     free_xml(reply);
+-    free_xml(notify_data);
+ }
+ 
+ /*!
+-- 
+2.27.0
+
+
+From 2814cde97520b63ca5f9baf3df37d73507e89d34 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 15 Dec 2021 17:40:52 -0600
+Subject: [PATCH 21/23] Low: fencer: restore check for invalid topology level
+ target
+
+... per review. b7c7676c mistakenly dropped it
+---
+ daemons/fenced/fenced_commands.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
+index 54ebc12947..1a4a791385 100644
+--- a/daemons/fenced/fenced_commands.c
++++ b/daemons/fenced/fenced_commands.c
+@@ -1636,6 +1636,16 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+         *desc = crm_strdup_printf("%s[%d]", target, id);
+     }
+ 
++    // Ensure a valid target was specified
++    if ((mode < 0) || (mode > 2)) {
++        crm_warn("Ignoring topology level registration without valid target");
++        free(target);
++        crm_log_xml_warn(level, "Bad level");
++        pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
++                         "Invalid topology level target");
++        return;
++    }
++
+     // Ensure level ID is in allowed range
+     if ((id <= 0) || (id >= ST_LEVEL_MAX)) {
+         crm_warn("Ignoring topology registration for %s with invalid level %d",
+@@ -1643,7 +1653,7 @@ fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result)
+         free(target);
+         crm_log_xml_warn(level, "Bad level");
+         pcmk__set_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID,
+-                         "Invalid topology level");
++                         "Invalid topology level number");
+         return;
+     }
+ 
+-- 
+2.27.0
+
+
+From c82806f9e16abcea00025fd3a290477aef2d8d83 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 21 Dec 2021 16:23:29 -0600
+Subject: [PATCH 22/23] Low: fencer: free result memory when processing fencing
+ replies
+
+found in review
+---
+ daemons/fenced/fenced_remote.c | 24 +++++++++++++++---------
+ 1 file changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 4cf723e6df..9fda9ef060 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -2241,14 +2241,14 @@ fenced_process_fencing_reply(xmlNode *msg)
+         /* Could be for an event that began before we started */
+         /* TODO: Record the op for later querying */
+         crm_info("Received peer result of unknown or expired operation %s", id);
+-        return;
++        goto done;
+     }
+ 
+     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
+         crm_err("Received outdated reply for device %s (instead of %s) to "
+                 "fence (%s) %s. Operation already timed out at peer level.",
+                 device, (const char *) op->devices->data, op->action, op->target);
+-        return;
++        goto done;
+     }
+ 
+     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
+@@ -2265,14 +2265,15 @@ fenced_process_fencing_reply(xmlNode *msg)
+             op->state = st_failed;
+         }
+         finalize_op(op, msg, &result, false);
+-        return;
++        goto done;
++
+     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         /* If this isn't a remote level broadcast, and we are not the
+          * originator of the operation, we should not be receiving this msg. */
+         crm_err("Received non-broadcast fencing result for operation %.8s "
+                 "we do not own (device %s targeting %s)",
+                 op->id, device, op->target);
+-        return;
++        goto done;
+     }
+ 
+     if (pcmk_is_set(op->call_options, st_opt_topology)) {
+@@ -2290,7 +2291,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+          * and notify our local clients. */
+         if (op->state == st_done) {
+             finalize_op(op, msg, &result, false);
+-            return;
++            goto done;
+         }
+ 
+         if ((op->phase == 2) && !pcmk__result_ok(&result)) {
+@@ -2310,27 +2311,30 @@ fenced_process_fencing_reply(xmlNode *msg)
+             /* An operation completed successfully. Try another device if
+              * necessary, otherwise mark the operation as done. */
+             advance_topology_device_in_level(op, device, msg);
+-            return;
++            goto done;
+         } else {
+             /* This device failed, time to try another topology level. If no other
+              * levels are available, mark this operation as failed and report results. */
+             if (advance_topology_level(op, false) != pcmk_rc_ok) {
+                 op->state = st_failed;
+                 finalize_op(op, msg, &result, false);
+-                return;
++                goto done;
+             }
+         }
++
+     } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
+         crm_trace("All done for %s", op->target);
+         op->state = st_done;
+         finalize_op(op, msg, &result, false);
+-        return;
++        goto done;
++
+     } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
+                && (op->devices == NULL)) {
+         /* If the operation timed out don't bother retrying other peers. */
+         op->state = st_failed;
+         finalize_op(op, msg, &result, false);
+-        return;
++        goto done;
++
+     } else {
+         /* fall-through and attempt other fencing action using another peer */
+     }
+@@ -2340,6 +2344,8 @@ fenced_process_fencing_reply(xmlNode *msg)
+               op->target, op->originator, op->client_name,
+               pcmk_exec_status_str(result.execution_status));
+     request_peer_fencing(op, NULL, &result);
++done:
++    pcmk__reset_result(&result);
+ }
+ 
+ gboolean
+-- 
+2.27.0
+
+
+From 137bf97fdb39043eebb02a0d3ebbe47ee8c7044c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 21 Dec 2021 16:26:22 -0600
+Subject: [PATCH 23/23] Log: fencer: clarify timeout message
+
+... as suggested by review
+---
+ daemons/fenced/fenced_remote.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 9fda9ef060..1e237150c5 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -656,7 +656,7 @@ remote_op_timeout_one(gpointer userdata)
+     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
+                " id=%.8s", op->action, op->target, op->client_name, op->id);
+     pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+-                     "Peer did not send fence result within timeout");
++                     "Peer did not return fence result within timeout");
+ 
+ 
+     // Try another device, if appropriate
+-- 
+2.27.0
+
diff --git a/SOURCES/010-probe-failures.patch b/SOURCES/010-probe-failures.patch
new file mode 100644
index 0000000..d90fc3c
--- /dev/null
+++ b/SOURCES/010-probe-failures.patch
@@ -0,0 +1,4157 @@
+From f2e51898735b5e9990464141fc4aea3dd83f5067 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Thu, 4 Nov 2021 14:36:41 -0400
+Subject: [PATCH 01/21] Refactor: scheduler: Use bool in unpack_rsc_op.
+
+Previously, we were using bool but TRUE/FALSE.  Instead, use the actual
+values.
+---
+ lib/pengine/unpack.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index b1e84110a2..ecc7275e15 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3671,7 +3671,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+     const char *task = NULL;
+     const char *task_key = NULL;
+     const char *exit_reason = NULL;
+-    bool expired = FALSE;
++    bool expired = false;
+     pe_resource_t *parent = rsc;
+     enum action_fail_response failure_strategy = action_fail_recover;
+ 
+@@ -3727,7 +3727,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+ 
+     if ((status != PCMK_EXEC_NOT_INSTALLED)
+         && check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
+-        expired = TRUE;
++        expired = true;
+     }
+ 
+     if (!strcmp(task, CRMD_ACTION_STATUS)) {
+-- 
+2.27.0
+
+
+From 4c961b8e670d336a368c7fd1535c247e40c6b48e Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Thu, 4 Nov 2021 15:07:01 -0400
+Subject: [PATCH 02/21] Refactor: scheduler: Add functions for determining if
+ an op is a probe.
+
+---
+ include/crm/common/util.h                     |  3 +
+ lib/common/operations.c                       | 21 +++++++
+ lib/common/tests/operations/Makefile.am       |  6 +-
+ .../tests/operations/pcmk_is_probe_test.c     | 37 +++++++++++++
+ .../tests/operations/pcmk_xe_is_probe_test.c  | 55 +++++++++++++++++++
+ lib/pengine/unpack.c                          | 12 ++--
+ lib/pengine/utils.c                           |  5 +-
+ 7 files changed, 127 insertions(+), 12 deletions(-)
+ create mode 100644 lib/common/tests/operations/pcmk_is_probe_test.c
+ create mode 100644 lib/common/tests/operations/pcmk_xe_is_probe_test.c
+
+diff --git a/include/crm/common/util.h b/include/crm/common/util.h
+index 2728b64492..fbea6e560c 100644
+--- a/include/crm/common/util.h
++++ b/include/crm/common/util.h
+@@ -72,6 +72,9 @@ xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix,
+                            const char *timeout);
+ #define CRM_DEFAULT_OP_TIMEOUT_S "20s"
+ 
++bool pcmk_is_probe(const char *task, guint interval);
++bool pcmk_xe_is_probe(xmlNode *xml_op);
++
+ int compare_version(const char *version1, const char *version2);
+ 
+ /* coverity[+kill] */
+diff --git a/lib/common/operations.c b/lib/common/operations.c
+index 366c189702..978df79082 100644
+--- a/lib/common/operations.c
++++ b/lib/common/operations.c
+@@ -537,3 +537,24 @@ pcmk__is_fencing_action(const char *action)
+ {
+     return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL);
+ }
++
++bool
++pcmk_is_probe(const char *task, guint interval)
++{
++    if (task == NULL) {
++        return false;
++    }
++
++    return (interval == 0) && pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none);
++}
++
++bool
++pcmk_xe_is_probe(xmlNode *xml_op)
++{
++    const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
++    const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
++    int interval_ms;
++
++    pcmk__scan_min_int(interval_ms_s, &interval_ms, 0);
++    return pcmk_is_probe(task, interval_ms);
++}
+diff --git a/lib/common/tests/operations/Makefile.am b/lib/common/tests/operations/Makefile.am
+index c8814ff0a8..2e3d0b0679 100644
+--- a/lib/common/tests/operations/Makefile.am
++++ b/lib/common/tests/operations/Makefile.am
+@@ -1,5 +1,5 @@
+ #
+-# Copyright 2020 the Pacemaker project contributors
++# Copyright 2020-2021 the Pacemaker project contributors
+ #
+ # The version control history for this file may have further details.
+ #
+@@ -12,6 +12,8 @@ LDADD = $(top_builddir)/lib/common/libcrmcommon.la -lcmocka
+ include $(top_srcdir)/mk/tap.mk
+ 
+ # Add "_test" to the end of all test program names to simplify .gitignore.
+-check_PROGRAMS = parse_op_key_test
++check_PROGRAMS = parse_op_key_test \
++				 pcmk_is_probe_test \
++				 pcmk_xe_is_probe_test
+ 
+ TESTS = $(check_PROGRAMS)
+diff --git a/lib/common/tests/operations/pcmk_is_probe_test.c b/lib/common/tests/operations/pcmk_is_probe_test.c
+new file mode 100644
+index 0000000000..9b449f1a70
+--- /dev/null
++++ b/lib/common/tests/operations/pcmk_is_probe_test.c
+@@ -0,0 +1,37 @@
++/*
++ * Copyright 2021 the Pacemaker project contributors
++ *
++ * The version control history for this file may have further details.
++ *
++ * This source code is licensed under the GNU Lesser General Public License
++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
++ */
++
++#include <crm_internal.h>
++
++#include <stdarg.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <setjmp.h>
++#include <cmocka.h>
++
++static void
++is_probe_test(void **state)
++{
++    assert_false(pcmk_is_probe(NULL, 0));
++    assert_false(pcmk_is_probe("", 0));
++    assert_false(pcmk_is_probe("blahblah", 0));
++    assert_false(pcmk_is_probe("monitor", 1));
++    assert_true(pcmk_is_probe("monitor", 0));
++}
++
++int main(int argc, char **argv)
++{
++    const struct CMUnitTest tests[] = {
++        cmocka_unit_test(is_probe_test),
++    };
++
++    cmocka_set_message_output(CM_OUTPUT_TAP);
++    return cmocka_run_group_tests(tests, NULL, NULL);
++}
+diff --git a/lib/common/tests/operations/pcmk_xe_is_probe_test.c b/lib/common/tests/operations/pcmk_xe_is_probe_test.c
+new file mode 100644
+index 0000000000..0283d1c145
+--- /dev/null
++++ b/lib/common/tests/operations/pcmk_xe_is_probe_test.c
+@@ -0,0 +1,55 @@
++/*
++ * Copyright 2021 the Pacemaker project contributors
++ *
++ * The version control history for this file may have further details.
++ *
++ * This source code is licensed under the GNU Lesser General Public License
++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
++ */
++
++#include <crm_internal.h>
++
++#include <stdarg.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <setjmp.h>
++#include <cmocka.h>
++
++static void
++op_is_probe_test(void **state)
++{
++    xmlNode *node = NULL;
++
++    assert_false(pcmk_xe_is_probe(NULL));
++
++    node = string2xml("<lrm_rsc_op/>");
++    assert_false(pcmk_xe_is_probe(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation_key=\"blah\" interval=\"30s\"/>");
++    assert_false(pcmk_xe_is_probe(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"30s\"/>");
++    assert_false(pcmk_xe_is_probe(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"start\" interval=\"0\"/>");
++    assert_false(pcmk_xe_is_probe(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\"/>");
++    assert_true(pcmk_xe_is_probe(node));
++    free_xml(node);
++}
++
++int main(int argc, char **argv)
++{
++    const struct CMUnitTest tests[] = {
++        cmocka_unit_test(op_is_probe_test),
++    };
++
++    cmocka_set_message_output(CM_OUTPUT_TAP);
++    return cmocka_run_group_tests(tests, NULL, NULL);
++}
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index ecc7275e15..7c0c66e696 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -83,7 +83,6 @@ is_dangling_guest_node(pe_node_t *node)
+     return FALSE;
+ }
+ 
+-
+ /*!
+  * \brief Schedule a fence action for a node
+  *
+@@ -2984,7 +2983,6 @@ static void
+ unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
+                       enum action_fail_response * on_fail, pe_working_set_t * data_set)
+ {
+-    guint interval_ms = 0;
+     bool is_probe = false;
+     pe_action_t *action = NULL;
+ 
+@@ -2998,10 +2996,7 @@ unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * x
+ 
+     *last_failure = xml_op;
+ 
+-    crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+-    if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
+-        is_probe = true;
+-    }
++    is_probe = pcmk_xe_is_probe(xml_op);
+ 
+     if (exit_reason == NULL) {
+         exit_reason = "";
+@@ -3163,8 +3158,9 @@ determine_op_status(
+     }
+ 
+     crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+-    if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
+-        is_probe = true;
++    is_probe = pcmk_xe_is_probe(xml_op);
++
++    if (is_probe) {
+         task = "probe";
+     }
+ 
+diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
+index c5eda3898e..07753e173a 100644
+--- a/lib/pengine/utils.c
++++ b/lib/pengine/utils.c
+@@ -1066,8 +1066,7 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai
+ {
+     int timeout_ms = 0;
+     const char *value = NULL;
+-    bool is_probe = pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei)
+-                    && (interval_ms == 0);
++    bool is_probe = false;
+ #if ENABLE_VERSIONED_ATTRS
+     pe_rsc_action_details_t *rsc_details = NULL;
+ #endif
+@@ -1094,6 +1093,8 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai
+ 
+     CRM_CHECK(action && action->rsc, return);
+ 
++    is_probe = pcmk_is_probe(action->task, interval_ms);
++
+     // Cluster-wide <op_defaults> <meta_attributes>
+     pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data,
+                                action->meta, NULL, FALSE, data_set);
+-- 
+2.27.0
+
+
+From 09f32df97ab5064a15ba5a1fb3970d5c64ee7b30 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 14:47:22 -0500
+Subject: [PATCH 03/21] Refactor: scheduler: Move setting interval_ms in
+ determine_op_status.
+
+This can now happen in the only place it's being used.
+---
+ lib/pengine/unpack.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index 7c0c66e696..b9986d2462 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3142,7 +3142,6 @@ static int
+ determine_op_status(
+     pe_resource_t *rsc, int rc, int target_rc, pe_node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) 
+ {
+-    guint interval_ms = 0;
+     bool is_probe = false;
+     int result = PCMK_EXEC_DONE;
+     const char *key = get_op_key(xml_op);
+@@ -3157,7 +3156,6 @@ determine_op_status(
+         exit_reason = "";
+     }
+ 
+-    crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+     is_probe = pcmk_xe_is_probe(xml_op);
+ 
+     if (is_probe) {
+@@ -3230,12 +3228,17 @@ determine_op_status(
+             result = PCMK_EXEC_ERROR_FATAL;
+             break;
+ 
+-        case PCMK_OCF_UNIMPLEMENT_FEATURE:
++        case PCMK_OCF_UNIMPLEMENT_FEATURE: {
++            guint interval_ms = 0;
++            crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
++
+             if (interval_ms > 0) {
+                 result = PCMK_EXEC_NOT_SUPPORTED;
+                 break;
+             }
+             // fall through
++        }
++
+         case PCMK_OCF_NOT_INSTALLED:
+         case PCMK_OCF_INVALID_PARAM:
+         case PCMK_OCF_INSUFFICIENT_PRIV:
+-- 
+2.27.0
+
+
+From 6c8f47453afd6c100fddc45187faff17e15f7bfe Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 14:57:57 -0500
+Subject: [PATCH 04/21] Refactor: scheduler: Add pcmk_xe_mask_failed_probe.
+
+Given an xmlNodePtr for a resource operation, this function will
+determine whether it is a failed probe operation that should not be
+displayed in crm_mon (or other places, I suppose) or not.
+---
+ include/crm/common/util.h                     |   1 +
+ lib/common/operations.c                       |  17 ++
+ lib/common/tests/operations/Makefile.am       |   3 +-
+ .../pcmk_xe_mask_probe_failure_test.c         | 162 ++++++++++++++++++
+ 4 files changed, 182 insertions(+), 1 deletion(-)
+ create mode 100644 lib/common/tests/operations/pcmk_xe_mask_probe_failure_test.c
+
+diff --git a/include/crm/common/util.h b/include/crm/common/util.h
+index fbea6e560c..784069ba1b 100644
+--- a/include/crm/common/util.h
++++ b/include/crm/common/util.h
+@@ -74,6 +74,7 @@ xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix,
+ 
+ bool pcmk_is_probe(const char *task, guint interval);
+ bool pcmk_xe_is_probe(xmlNode *xml_op);
++bool pcmk_xe_mask_probe_failure(xmlNode *xml_op);
+ 
+ int compare_version(const char *version1, const char *version2);
+ 
+diff --git a/lib/common/operations.c b/lib/common/operations.c
+index 978df79082..54482b8863 100644
+--- a/lib/common/operations.c
++++ b/lib/common/operations.c
+@@ -558,3 +558,20 @@ pcmk_xe_is_probe(xmlNode *xml_op)
+     pcmk__scan_min_int(interval_ms_s, &interval_ms, 0);
+     return pcmk_is_probe(task, interval_ms);
+ }
++
++bool
++pcmk_xe_mask_probe_failure(xmlNode *xml_op)
++{
++    int status = PCMK_EXEC_UNKNOWN;
++    int rc = PCMK_OCF_OK;
++
++    if (!pcmk_xe_is_probe(xml_op)) {
++        return false;
++    }
++
++    crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
++    crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
++
++    return rc == PCMK_OCF_NOT_INSTALLED || rc == PCMK_OCF_INVALID_PARAM ||
++           status == PCMK_EXEC_NOT_INSTALLED;
++}
+diff --git a/lib/common/tests/operations/Makefile.am b/lib/common/tests/operations/Makefile.am
+index 2e3d0b0679..457c5f7c7a 100644
+--- a/lib/common/tests/operations/Makefile.am
++++ b/lib/common/tests/operations/Makefile.am
+@@ -14,6 +14,7 @@ include $(top_srcdir)/mk/tap.mk
+ # Add "_test" to the end of all test program names to simplify .gitignore.
+ check_PROGRAMS = parse_op_key_test \
+ 				 pcmk_is_probe_test \
+-				 pcmk_xe_is_probe_test
++				 pcmk_xe_is_probe_test \
++				 pcmk_xe_mask_probe_failure_test
+ 
+ TESTS = $(check_PROGRAMS)
+diff --git a/lib/common/tests/operations/pcmk_xe_mask_probe_failure_test.c b/lib/common/tests/operations/pcmk_xe_mask_probe_failure_test.c
+new file mode 100644
+index 0000000000..a13f6d98f4
+--- /dev/null
++++ b/lib/common/tests/operations/pcmk_xe_mask_probe_failure_test.c
+@@ -0,0 +1,162 @@
++/*
++ * Copyright 2021 the Pacemaker project contributors
++ *
++ * The version control history for this file may have further details.
++ *
++ * This source code is licensed under the GNU Lesser General Public License
++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
++ */
++
++#include <crm_internal.h>
++
++#include <stdarg.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <setjmp.h>
++#include <cmocka.h>
++
++static void
++op_is_not_probe_test(void **state) {
++    xmlNode *node = NULL;
++
++    /* Not worth testing this thoroughly since it's just a duplicate of whether
++     * pcmk_op_is_probe works or not.
++     */
++
++    node = string2xml("<lrm_rsc_op operation=\"start\" interval=\"0\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++}
++
++static void
++op_does_not_have_right_values_test(void **state) {
++    xmlNode *node = NULL;
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++}
++
++static void
++check_values_test(void **state) {
++    xmlNode *node = NULL;
++
++    /* PCMK_EXEC_NOT_SUPPORTED */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"3\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"3\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    /* PCMK_EXEC_DONE */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"0\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"2\" op-status=\"0\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"0\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"6\" op-status=\"0\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"7\" op-status=\"0\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    /* PCMK_EXEC_NOT_INSTALLED */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"7\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"7\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    /* PCMK_EXEC_ERROR */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"4\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"2\" op-status=\"4\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"4\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"6\" op-status=\"4\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"7\" op-status=\"4\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    /* PCMK_EXEC_ERROR_HARD */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"5\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"2\" op-status=\"5\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"5\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"6\" op-status=\"5\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"7\" op-status=\"5\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    /* PCMK_EXEC_ERROR_FATAL */
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"0\" op-status=\"6\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"2\" op-status=\"6\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"5\" op-status=\"6\"/>");
++    assert_true(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"6\" op-status=\"6\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++
++    node = string2xml("<lrm_rsc_op operation=\"monitor\" interval=\"0\" rc-code=\"7\" op-status=\"6\"/>");
++    assert_false(pcmk_xe_mask_probe_failure(node));
++    free_xml(node);
++}
++
++int main(int argc, char **argv)
++{
++    const struct CMUnitTest tests[] = {
++        cmocka_unit_test(op_is_not_probe_test),
++        cmocka_unit_test(op_does_not_have_right_values_test),
++        cmocka_unit_test(check_values_test),
++    };
++
++    cmocka_set_message_output(CM_OUTPUT_TAP);
++    return cmocka_run_group_tests(tests, NULL, NULL);
++}
+-- 
+2.27.0
+
+
+From c9ce1aaf93cd20bb01e80102dda0ffffb07e6472 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Wed, 1 Dec 2021 14:26:31 -0500
+Subject: [PATCH 05/21] Refactor: scheduler: Combine op status and rc remapping
+ into one function.
+
+Well, not quite.  Doing the remapping is complicated enough to where it
+makes sense to have them in separate functions.  However, they can both
+be called from a single new function that takes the place of the
+previous two calls in unpack_rsc_op.
+---
+ lib/pengine/unpack.c | 157 ++++++++++++++++++++-----------------------
+ 1 file changed, 72 insertions(+), 85 deletions(-)
+
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index b9986d2462..b659f319fb 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3121,36 +3121,68 @@ unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * x
+ 
+ /*!
+  * \internal
+- * \brief Remap operation status based on action result
++ * \brief Remap informational monitor results and operation status
+  *
+- * Given an action result, determine an appropriate operation status for the
+- * purposes of responding to the action (the status provided by the executor is
+- * not directly usable since the executor does not know what was expected).
++ * For the monitor results, certain OCF codes are for providing extended information
++ * to the user about services that aren't yet failed but not entirely healthy either.
++ * These must be treated as the "normal" result by Pacemaker.
++ *
++ * For operation status, the action result can be used to determine an appropriate
++ * status for the purposes of responding to the action.  The status provided by the
++ * executor is not directly usable since the executor does not know what was expected.
+  *
++ * \param[in]     xml_op     Operation history entry XML from CIB status
+  * \param[in,out] rsc        Resource that operation history entry is for
+- * \param[in]     rc         Actual return code of operation
+- * \param[in]     target_rc  Expected return code of operation
+  * \param[in]     node       Node where operation was executed
+- * \param[in]     xml_op     Operation history entry XML from CIB status
+- * \param[in,out] on_fail    What should be done about the result
+  * \param[in]     data_set   Current cluster working set
++ * \param[in,out] on_fail    What should be done about the result
++ * \param[in]     target_rc  Expected return code of operation
++ * \param[in,out] rc         Actual return code of operation
++ * \param[in,out] status     Operation execution status
++ *
++ * \note If the result is remapped and the node is not shutting down or failed,
++ *       the operation will be recorded in the data set's list of failed operations
++ *       to highlight it for the user.
+  *
+- * \return Operation status based on return code and action info
+  * \note This may update the resource's current and next role.
+  */
+-static int
+-determine_op_status(
+-    pe_resource_t *rsc, int rc, int target_rc, pe_node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) 
+-{
++static void
++remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node,
++                pe_working_set_t *data_set, enum action_fail_response *on_fail,
++                int target_rc, int *rc, int *status) {
+     bool is_probe = false;
+-    int result = PCMK_EXEC_DONE;
+-    const char *key = get_op_key(xml_op);
+     const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
++    const char *key = get_op_key(xml_op);
+     const char *exit_reason = crm_element_value(xml_op,
+                                                 XML_LRM_ATTR_EXIT_REASON);
+ 
++    if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) {
++        int remapped_rc = pcmk__effective_rc(*rc);
++
++        if (*rc != remapped_rc) {
++            crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc);
++            if (!node->details->shutdown || node->details->online) {
++                record_failed_op(xml_op, node, rsc, data_set);
++            }
++
++            *rc = remapped_rc;
++        }
++    }
++
++    /* If the executor reported an operation status of anything but done or
++     * error, consider that final. But for done or error, we know better whether
++     * it should be treated as a failure or not, because we know the expected
++     * result.
++     */
++    if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) {
++        return;
++    }
++
+     CRM_ASSERT(rsc);
+-    CRM_CHECK(task != NULL, return PCMK_EXEC_ERROR);
++    CRM_CHECK(task != NULL,
++              *status = PCMK_EXEC_ERROR; return);
++
++    *status = PCMK_EXEC_DONE;
+ 
+     if (exit_reason == NULL) {
+         exit_reason = "";
+@@ -3171,23 +3203,23 @@ determine_op_status(
+          * those versions or processing of saved CIB files from those versions,
+          * so we do not need to care much about this case.
+          */
+-        result = PCMK_EXEC_ERROR;
++        *status = PCMK_EXEC_ERROR;
+         crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
+                  key, node->details->uname);
+ 
+-    } else if (target_rc != rc) {
+-        result = PCMK_EXEC_ERROR;
++    } else if (target_rc != *rc) {
++        *status = PCMK_EXEC_ERROR;
+         pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)",
+                      key, node->details->uname,
+                      target_rc, services_ocf_exitcode_str(target_rc),
+-                     rc, services_ocf_exitcode_str(rc),
++                     *rc, services_ocf_exitcode_str(*rc),
+                      (*exit_reason? ": " : ""), exit_reason);
+     }
+ 
+-    switch (rc) {
++    switch (*rc) {
+         case PCMK_OCF_OK:
+             if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
+-                result = PCMK_EXEC_DONE;
++                *status = PCMK_EXEC_DONE;
+                 pe_rsc_info(rsc, "Probe found %s active on %s at %s",
+                             rsc->id, node->details->uname,
+                             last_change_str(xml_op));
+@@ -3195,10 +3227,10 @@ determine_op_status(
+             break;
+ 
+         case PCMK_OCF_NOT_RUNNING:
+-            if (is_probe || (target_rc == rc)
++            if (is_probe || (target_rc == *rc)
+                 || !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
+ 
+-                result = PCMK_EXEC_DONE;
++                *status = PCMK_EXEC_DONE;
+                 rsc->role = RSC_ROLE_STOPPED;
+ 
+                 /* clear any previous failure actions */
+@@ -3208,8 +3240,8 @@ determine_op_status(
+             break;
+ 
+         case PCMK_OCF_RUNNING_PROMOTED:
+-            if (is_probe && (rc != target_rc)) {
+-                result = PCMK_EXEC_DONE;
++            if (is_probe && (*rc != target_rc)) {
++                *status = PCMK_EXEC_DONE;
+                 pe_rsc_info(rsc,
+                             "Probe found %s active and promoted on %s at %s",
+                             rsc->id, node->details->uname,
+@@ -3221,11 +3253,11 @@ determine_op_status(
+         case PCMK_OCF_DEGRADED_PROMOTED:
+         case PCMK_OCF_FAILED_PROMOTED:
+             rsc->role = RSC_ROLE_PROMOTED;
+-            result = PCMK_EXEC_ERROR;
++            *status = PCMK_EXEC_ERROR;
+             break;
+ 
+         case PCMK_OCF_NOT_CONFIGURED:
+-            result = PCMK_EXEC_ERROR_FATAL;
++            *status = PCMK_EXEC_ERROR_FATAL;
+             break;
+ 
+         case PCMK_OCF_UNIMPLEMENT_FEATURE: {
+@@ -3233,7 +3265,7 @@ determine_op_status(
+             crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
+ 
+             if (interval_ms > 0) {
+-                result = PCMK_EXEC_NOT_SUPPORTED;
++                *status = PCMK_EXEC_NOT_SUPPORTED;
+                 break;
+             }
+             // fall through
+@@ -3248,26 +3280,27 @@ determine_op_status(
+                 pe_proc_err("No further recovery can be attempted for %s "
+                             "because %s on %s failed (%s%s%s) at %s "
+                             CRM_XS " rc=%d id=%s", rsc->id, task,
+-                            node->details->uname, services_ocf_exitcode_str(rc),
++                            node->details->uname, services_ocf_exitcode_str(*rc),
+                             (*exit_reason? ": " : ""), exit_reason,
+-                            last_change_str(xml_op), rc, ID(xml_op));
++                            last_change_str(xml_op), *rc, ID(xml_op));
+                 pe__clear_resource_flags(rsc, pe_rsc_managed);
+                 pe__set_resource_flags(rsc, pe_rsc_block);
+             }
+-            result = PCMK_EXEC_ERROR_HARD;
++            *status = PCMK_EXEC_ERROR_HARD;
+             break;
+ 
+         default:
+-            if (result == PCMK_EXEC_DONE) {
++            if (*status == PCMK_EXEC_DONE) {
+                 crm_info("Treating unknown exit status %d from %s of %s "
+                          "on %s at %s as failure",
+-                         rc, task, rsc->id, node->details->uname,
++                         *rc, task, rsc->id, node->details->uname,
+                          last_change_str(xml_op));
+-                result = PCMK_EXEC_ERROR;
++                *status = PCMK_EXEC_ERROR;
+             }
+             break;
+     }
+-    return result;
++
++    pe_rsc_trace(rsc, "Remapped %s status to %d", key, *status);
+ }
+ 
+ // return TRUE if start or monitor last failure but parameters changed
+@@ -3622,41 +3655,6 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c
+     }
+ }
+ 
+-/*!
+- * \internal
+- * \brief Remap informational monitor results to usual values
+- *
+- * Certain OCF result codes are for providing extended information to the
+- * user about services that aren't yet failed but not entirely healthy either.
+- * These must be treated as the "normal" result by Pacemaker.
+- *
+- * \param[in] rc        Actual result of a monitor action
+- * \param[in] xml_op    Operation history XML
+- * \param[in] node      Node that operation happened on
+- * \param[in] rsc       Resource that operation happened to
+- * \param[in] data_set  Cluster working set
+- *
+- * \return Result code that pacemaker should use
+- *
+- * \note If the result is remapped, and the node is not shutting down or failed,
+- *       the operation will be recorded in the data set's list of failed
+- *       operations, to highlight it for the user.
+- */
+-static int
+-remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node,
+-                 const pe_resource_t *rsc, pe_working_set_t *data_set)
+-{
+-    int remapped_rc = pcmk__effective_rc(rc);
+-
+-    if (rc != remapped_rc) {
+-        crm_trace("Remapping monitor result %d to %d", rc, remapped_rc);
+-        if (!node->details->shutdown || node->details->online) {
+-            record_failed_op(xml_op, node, rsc, data_set);
+-        }
+-    }
+-    return remapped_rc;
+-}
+-
+ static void
+ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+               xmlNode **last_failure, enum action_fail_response *on_fail,
+@@ -3712,7 +3710,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+                      node->details->uname, rsc->id);
+     }
+ 
+-    /* It should be possible to call remap_monitor_rc() first then call
++    /* It should be possible to call remap_operation() first then call
+      * check_operation_expiry() only if rc != target_rc, because there should
+      * never be a fail count without at least one unexpected result in the
+      * resource history. That would be more efficient by avoiding having to call
+@@ -3729,9 +3727,8 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+         expired = true;
+     }
+ 
+-    if (!strcmp(task, CRMD_ACTION_STATUS)) {
+-        rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set);
+-    }
++    remap_operation(xml_op, rsc, node, data_set, on_fail, target_rc,
++                    &rc, &status);
+ 
+     if (expired && (rc != target_rc)) {
+         const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
+@@ -3761,16 +3758,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+         }
+     }
+ 
+-    /* If the executor reported an operation status of anything but done or
+-     * error, consider that final. But for done or error, we know better whether
+-     * it should be treated as a failure or not, because we know the expected
+-     * result.
+-     */
+-    if(status == PCMK_EXEC_DONE || status == PCMK_EXEC_ERROR) {
+-        status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
+-        pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status);
+-    }
+-
+     switch (status) {
+         case PCMK_EXEC_CANCELLED:
+             // Should never happen
+-- 
+2.27.0
+
+
+From 9fdca1999872b3930cf18b7d807ddb259f23e8a5 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 15:08:16 -0500
+Subject: [PATCH 06/21] Test: cts-cli: Add test output for a native resource
+ with a failed probe op.
+
+There are no code changes yet to properly handle displaying these
+operations, so the results here just reflect the current handling.
+---
+ cts/cli/crm_mon-partial.xml    | 16 +++++++++++
+ cts/cli/regression.crm_mon.exp | 50 ++++++++++++++++++++++++++--------
+ 2 files changed, 55 insertions(+), 11 deletions(-)
+
+diff --git a/cts/cli/crm_mon-partial.xml b/cts/cli/crm_mon-partial.xml
+index e6c6894b6f..b7817e4775 100644
+--- a/cts/cli/crm_mon-partial.xml
++++ b/cts/cli/crm_mon-partial.xml
+@@ -60,6 +60,16 @@
+           </meta_attributes>
+         </primitive>
+       </group>
++      <primitive class="ocf" id="smart-mon" provider="pacemaker" type="HealthSMART">
++        <operations>
++          <op id="smart-mon-monitor-interval-10s" interval="10s" name="monitor" start-delay="0s" timeout="10s"/>
++          <op id="smart-mon-start-interval-0s" interval="0s" name="start" timeout="10s"/>
++          <op id="smart-mon-stop-interval-0s" interval="0s" name="stop" timeout="10s"/>
++        </operations>
++        <instance_attributes id="smart-mon-instance_attributes">
++          <nvpair id="smart-mon-instance_attributes-drives" name="drives" value="/dev/nonexistent"/>
++        </instance_attributes>
++      </primitive>
+     </resources>
+     <constraints/>
+   </configuration>
+@@ -94,6 +104,9 @@
+           <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
+             <lrm_rsc_op id="dummy-1_last_0" operation_key="dummy-1_start_0" operation="start" crm-debug-origin="crm_simulate" crm_feature_set="3.6.0" transition-key="2:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;2:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" call-id="2" rc-code="0" op-status="0" interval="0" last-rc-change="1599063458" exec-time="0" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+           </lrm_resource>
++          <lrm_resource id="smart-mon" type="HealthSMART" class="ocf" provider="pacemaker">
++            <lrm_rsc_op id="smart-mon_last_failure_0" operation_key="smart-mon_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="9" rc-code="5" op-status="0" interval="0" last-rc-change="1636490335" exec-time="33" queue-time="0" op-digest="b368e619fcd06788c996f6a2ef2efb6a"/>
++          </lrm_resource>
+         </lrm_resources>
+       </lrm>
+       <transient_attributes id="2">
+@@ -135,6 +148,9 @@
+             <lrm_rsc_op id="httpd-bundle-1_monitor_30000" operation_key="httpd-bundle-1_monitor_30000" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="3:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;3:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" call-id="3" rc-code="0" op-status="0" interval="30000" last-rc-change="1590608589" exec-time="0" queue-time="0" op-digest="7592cb10fa1499772a031adfd385f558"/>
+           </lrm_resource>
+         </lrm_resources>
++        <lrm_resource id="smart-mon" type="HealthSMART" class="ocf" provider="pacemaker">
++          <lrm_rsc_op id="smart-mon_last_failure_0" operation_key="smart-mon_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="9" rc-code="5" op-status="0" interval="0" last-rc-change="1636490335" exec-time="33" queue-time="0" op-digest="b368e619fcd06788c996f6a2ef2efb6a"/>
++        </lrm_resource>
+       </lrm>
+       <transient_attributes id="1">
+         <instance_attributes id="status-1">
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index 8714f917a9..d12dce3ae8 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3470,7 +3470,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3485,6 +3485,9 @@ Active Resources:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group (1 member inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
++
++Failed Resource Actions:
++  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3495,7 +3498,7 @@ Active Resources:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="13" disabled="1" blocked="0"/>
++    <resources_configured number="14" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+@@ -3548,6 +3551,7 @@ Active Resources:
+       </resource>
+       <resource id="dummy-2" resource_agent="ocf:pacemaker:Dummy" role="Stopped" target_role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+     </group>
++    <resource id="smart-mon" resource_agent="ocf:pacemaker:HealthSMART" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+   </resources>
+   <node_attributes>
+     <node name="cluster01">
+@@ -3574,6 +3578,9 @@ Active Resources:
+       <resource_history id="dummy-1" orphan="false" migration-threshold="1000000">
+         <operation_history call="2" task="start" rc="0" rc_text="ok" exec-time="0ms" queue-time="0ms"/>
+       </resource_history>
++      <resource_history id="smart-mon" orphan="false" migration-threshold="1000000">
++        <operation_history call="9" task="probe" rc="5" rc_text="not installed" exec-time="33ms" queue-time="0ms"/>
++      </resource_history>
+     </node>
+     <node name="cluster01">
+       <resource_history id="Fencing" orphan="false" migration-threshold="1000000">
+@@ -3603,6 +3610,9 @@ Active Resources:
+       </resource_history>
+     </node>
+   </node_history>
++  <failures>
++    <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
++  </failures>
+   <status code="0" message="OK"/>
+ </pacemaker-result>
+ =#=#=#= End test: XML output of partially active resources - OK (0) =#=#=#=
+@@ -3614,7 +3624,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3631,6 +3641,10 @@ Full List of Resources:
+   * Resource Group: partially-active-group:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
++
++Failed Resource Actions:
++  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+@@ -3640,13 +3654,14 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Full List of Resources:
++  * 0/1	(ocf:pacemaker:HealthSMART):	Active
+   * 1/1	(stonith:fence_xvm):	Active cluster01
+   * Clone Set: ping-clone [ping]:
+     * Started: [ cluster01 ]
+@@ -3676,6 +3691,8 @@ Operations:
+       * (3) monitor: interval="30000ms"
+     * dummy-1: migration-threshold=1000000:
+       * (2) start
++    * smart-mon: migration-threshold=1000000:
++      * (9) probe
+   * Node: cluster01:
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+@@ -3695,6 +3712,9 @@ Operations:
+   * Node: httpd-bundle-0@cluster02:
+     * httpd: migration-threshold=1000000:
+       * (1) start
++
++Failed Resource Actions:
++  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3704,7 +3724,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3722,7 +3742,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3741,7 +3761,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3759,7 +3779,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3777,7 +3797,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Node cluster01: online:
+@@ -3806,6 +3826,7 @@ Inactive Resources:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+     * 1/2	(ocf:pacemaker:Dummy):	Active cluster02
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Node Attributes:
+   * Node: cluster01:
+@@ -3826,6 +3847,8 @@ Operations:
+       * (3) monitor: interval="30000ms"
+     * dummy-1: migration-threshold=1000000:
+       * (2) start
++    * smart-mon: migration-threshold=1000000:
++      * (9) probe
+   * Node: cluster01:
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+@@ -3845,6 +3868,9 @@ Operations:
+   * Node: httpd-bundle-0@cluster02:
+     * httpd: migration-threshold=1000000:
+       * (1) start
++
++Failed Resource Actions:
++  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+@@ -3854,7 +3880,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 13 resource instances configured (1 DISABLED)
++  * 14 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 ]
+@@ -3865,6 +3891,7 @@ Full List of Resources:
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ =#=#=#= End test: Text output of partially active resources, with inactive resources, filtered by node - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources, filtered by node
+ =#=#=#= Begin test: Text output of partially active resources, filtered by node =#=#=#=
+@@ -3875,7 +3902,7 @@ Full List of Resources:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="13" disabled="1" blocked="0"/>
++    <resources_configured number="14" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+@@ -3905,6 +3932,7 @@ Full List of Resources:
+         </resource>
+       </replica>
+     </bundle>
++    <resource id="smart-mon" resource_agent="ocf:pacemaker:HealthSMART" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+   </resources>
+   <node_attributes>
+     <node name="cluster01">
+-- 
+2.27.0
+
+
+From 1c54d0bbb74d066d55a56eae28d1a579b8854604 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 15:17:52 -0500
+Subject: [PATCH 07/21] Test: cts-cli: Add test output for a cloned resource
+ with a failed probe op.
+
+There are no code changes yet to properly handle displaying these
+operations, so the results here just reflect the current handling.
+---
+ cts/cli/crm_mon-partial.xml    |  3 +++
+ cts/cli/regression.crm_mon.exp | 12 ++++++++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/cts/cli/crm_mon-partial.xml b/cts/cli/crm_mon-partial.xml
+index b7817e4775..1f9dc156aa 100644
+--- a/cts/cli/crm_mon-partial.xml
++++ b/cts/cli/crm_mon-partial.xml
+@@ -107,6 +107,9 @@
+           <lrm_resource id="smart-mon" type="HealthSMART" class="ocf" provider="pacemaker">
+             <lrm_rsc_op id="smart-mon_last_failure_0" operation_key="smart-mon_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="9" rc-code="5" op-status="0" interval="0" last-rc-change="1636490335" exec-time="33" queue-time="0" op-digest="b368e619fcd06788c996f6a2ef2efb6a"/>
+           </lrm_resource>
++          <lrm_resource id="ping" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping_last_failure_0" operation_key="ping_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="6" rc-code="5" op-status="0" interval="0" last-rc-change="1637259102" exec-time="0" queue-time="0"/>
++          </lrm_resource>
+         </lrm_resources>
+       </lrm>
+       <transient_attributes id="2">
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index d12dce3ae8..d093bd8106 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3488,6 +3488,7 @@ Active Resources:
+ 
+ Failed Resource Actions:
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
++  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3581,6 +3582,9 @@ Failed Resource Actions:
+       <resource_history id="smart-mon" orphan="false" migration-threshold="1000000">
+         <operation_history call="9" task="probe" rc="5" rc_text="not installed" exec-time="33ms" queue-time="0ms"/>
+       </resource_history>
++      <resource_history id="ping" orphan="false" migration-threshold="1000000">
++        <operation_history call="6" task="probe" rc="5" rc_text="not installed" exec-time="0ms" queue-time="0ms"/>
++      </resource_history>
+     </node>
+     <node name="cluster01">
+       <resource_history id="Fencing" orphan="false" migration-threshold="1000000">
+@@ -3612,6 +3616,7 @@ Failed Resource Actions:
+   </node_history>
+   <failures>
+     <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
++    <failure op_key="ping_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="6" status="complete" last-rc-change="2021-11-18 13:11:42 -05:00" queued="0" exec="0" interval="0" task="monitor"/>
+   </failures>
+   <status code="0" message="OK"/>
+ </pacemaker-result>
+@@ -3645,6 +3650,7 @@ Full List of Resources:
+ 
+ Failed Resource Actions:
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
++  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+@@ -3693,6 +3699,8 @@ Operations:
+       * (2) start
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
++    * ping: migration-threshold=1000000:
++      * (6) probe
+   * Node: cluster01:
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+@@ -3715,6 +3723,7 @@ Operations:
+ 
+ Failed Resource Actions:
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
++  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3849,6 +3858,8 @@ Operations:
+       * (2) start
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
++    * ping: migration-threshold=1000000:
++      * (6) probe
+   * Node: cluster01:
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+@@ -3871,6 +3882,7 @@ Operations:
+ 
+ Failed Resource Actions:
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
++  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+-- 
+2.27.0
+
+
+From 9408f08c07eb531ff84b07bf959f3d681ebf2b78 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 15:48:16 -0500
+Subject: [PATCH 08/21] Test: cts-cli: Change the resources in
+ partially-active-group.
+
+dummy-2 is now not running because it failed to start due to an
+unimplemented feature.  I don't know what could possibly be
+unimplemented about a dummy resource, but it's not important.
+
+There is also a new dummy-3 resource that acts exactly the same as
+dummy-2.  This preserves checking that the inactive member output can
+still be displayed.
+
+There are no code changes yet to properly handle displaying these
+operations, so the results here just reflect the current handling.
+---
+ cts/cli/crm_mon-partial.xml    |  6 +++-
+ cts/cli/regression.crm_mon.exp | 62 +++++++++++++++++++++++-----------
+ 2 files changed, 47 insertions(+), 21 deletions(-)
+
+diff --git a/cts/cli/crm_mon-partial.xml b/cts/cli/crm_mon-partial.xml
+index 1f9dc156aa..1ce80ea58a 100644
+--- a/cts/cli/crm_mon-partial.xml
++++ b/cts/cli/crm_mon-partial.xml
+@@ -54,7 +54,8 @@
+       </bundle>
+       <group id="partially-active-group">
+         <primitive class="ocf" id="dummy-1" provider="pacemaker" type="Dummy"/>
+-        <primitive class="ocf" id="dummy-2" provider="pacemaker" type="Dummy">
++        <primitive class="ocf" id="dummy-2" provider="pacemaker" type="Dummy"/>
++        <primitive class="ocf" id="dummy-3" provider="pacemaker" type="Dummy">
+           <meta_attributes id="inactive-dummy-meta_attributes">
+             <nvpair id="inactive-dummy-meta_attributes-target-role" name="target-role" value="Stopped"/>
+           </meta_attributes>
+@@ -104,6 +105,9 @@
+           <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
+             <lrm_rsc_op id="dummy-1_last_0" operation_key="dummy-1_start_0" operation="start" crm-debug-origin="crm_simulate" crm_feature_set="3.6.0" transition-key="2:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;2:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" call-id="2" rc-code="0" op-status="0" interval="0" last-rc-change="1599063458" exec-time="0" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+           </lrm_resource>
++          <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:3;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="3" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
+           <lrm_resource id="smart-mon" type="HealthSMART" class="ocf" provider="pacemaker">
+             <lrm_rsc_op id="smart-mon_last_failure_0" operation_key="smart-mon_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="9" rc-code="5" op-status="0" interval="0" last-rc-change="1636490335" exec-time="33" queue-time="0" op-digest="b368e619fcd06788c996f6a2ef2efb6a"/>
+           </lrm_resource>
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index d093bd8106..8cf3a1215e 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3470,7 +3470,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3485,8 +3485,10 @@ Active Resources:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group (1 member inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
++    * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
++  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+@@ -3499,12 +3501,12 @@ Failed Resource Actions:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="14" disabled="1" blocked="0"/>
++    <resources_configured number="15" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+     <node name="cluster01" id="1" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="5" type="member"/>
+-    <node name="cluster02" id="2" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="4" type="member"/>
++    <node name="cluster02" id="2" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="5" type="member"/>
+     <node name="httpd-bundle-0" id="httpd-bundle-0" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="1" type="remote" id_as_resource="httpd-bundle-docker-0"/>
+     <node name="httpd-bundle-1" id="httpd-bundle-1" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="0" type="remote" id_as_resource="httpd-bundle-docker-1"/>
+   </nodes>
+@@ -3546,11 +3548,14 @@ Failed Resource Actions:
+         </resource>
+       </replica>
+     </bundle>
+-    <group id="partially-active-group" number_resources="2" managed="true" disabled="false">
++    <group id="partially-active-group" number_resources="3" managed="true" disabled="false">
+       <resource id="dummy-1" resource_agent="ocf:pacemaker:Dummy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+         <node name="cluster02" id="2" cached="true"/>
+       </resource>
+-      <resource id="dummy-2" resource_agent="ocf:pacemaker:Dummy" role="Stopped" target_role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
++      <resource id="dummy-2" resource_agent="ocf:pacemaker:Dummy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="true" failure_ignored="false" nodes_running_on="1">
++        <node name="cluster02" id="2" cached="true"/>
++      </resource>
++      <resource id="dummy-3" resource_agent="ocf:pacemaker:Dummy" role="Stopped" target_role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+     </group>
+     <resource id="smart-mon" resource_agent="ocf:pacemaker:HealthSMART" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+   </resources>
+@@ -3579,6 +3584,9 @@ Failed Resource Actions:
+       <resource_history id="dummy-1" orphan="false" migration-threshold="1000000">
+         <operation_history call="2" task="start" rc="0" rc_text="ok" exec-time="0ms" queue-time="0ms"/>
+       </resource_history>
++      <resource_history id="dummy-2" orphan="false" migration-threshold="1000000">
++        <operation_history call="2" task="probe" rc="3" rc_text="unimplemented feature" exec-time="33ms" queue-time="0ms"/>
++      </resource_history>
+       <resource_history id="smart-mon" orphan="false" migration-threshold="1000000">
+         <operation_history call="9" task="probe" rc="5" rc_text="not installed" exec-time="33ms" queue-time="0ms"/>
+       </resource_history>
+@@ -3615,6 +3623,7 @@ Failed Resource Actions:
+     </node>
+   </node_history>
+   <failures>
++    <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="33" interval="0" task="monitor"/>
+     <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
+     <failure op_key="ping_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="6" status="complete" last-rc-change="2021-11-18 13:11:42 -05:00" queued="0" exec="0" interval="0" task="monitor"/>
+   </failures>
+@@ -3629,7 +3638,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3645,10 +3654,12 @@ Full List of Resources:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+-    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++    * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
++    * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Failed Resource Actions:
++  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+@@ -3660,7 +3671,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3676,7 +3687,7 @@ Full List of Resources:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+-    * 1/2	(ocf:pacemaker:Dummy):	Active cluster02
++    * 2/3	(ocf:pacemaker:Dummy):	Active cluster02
+ 
+ Node Attributes:
+   * Node: cluster01:
+@@ -3697,6 +3708,8 @@ Operations:
+       * (3) monitor: interval="30000ms"
+     * dummy-1: migration-threshold=1000000:
+       * (2) start
++    * dummy-2: migration-threshold=1000000:
++      * (2) probe
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+@@ -3722,6 +3735,7 @@ Operations:
+       * (1) start
+ 
+ Failed Resource Actions:
++  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+@@ -3733,7 +3747,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3742,6 +3756,7 @@ Node List:
+ Active Resources:
+   * Resource Group: partially-active-group (1 member inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
++    * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ =#=#=#= End test: Text output of partially active group - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active group
+ =#=#=#= Begin test: Text output of partially active group, with inactive resources =#=#=#=
+@@ -3751,7 +3766,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3760,7 +3775,8 @@ Node List:
+ Full List of Resources:
+   * Resource Group: partially-active-group:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+-    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++    * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
++    * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
+ =#=#=#= End test: Text output of partially active group, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active group, with inactive resources
+ =#=#=#= Begin test: Text output of active member of partially active group =#=#=#=
+@@ -3770,7 +3786,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3788,7 +3804,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3796,7 +3812,10 @@ Node List:
+ 
+ Active Resources:
+   * Resource Group: partially-active-group (1 member inactive):
+-    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++    * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
++
++Failed Resource Actions:
++  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+ =#=#=#= End test: Text output of inactive member of partially active group - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of inactive member of partially active group
+ =#=#=#= Begin test: Complete brief text output grouped by node, with inactive resources =#=#=#=
+@@ -3806,7 +3825,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Node cluster01: online:
+@@ -3820,7 +3839,7 @@ Node List:
+     * Resources:
+       * 1	(ocf:heartbeat:IPaddr2):	Active 
+       * 1	(ocf:heartbeat:docker):	Active 
+-      * 1	(ocf:pacemaker:Dummy):	Active 
++      * 2	(ocf:pacemaker:Dummy):	Active 
+       * 1	(ocf:pacemaker:remote):	Active 
+   * GuestNode httpd-bundle-0@cluster02: online:
+     * Resources:
+@@ -3834,7 +3853,7 @@ Inactive Resources:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+-    * 1/2	(ocf:pacemaker:Dummy):	Active cluster02
++    * 2/3	(ocf:pacemaker:Dummy):	Active cluster02
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Node Attributes:
+@@ -3856,6 +3875,8 @@ Operations:
+       * (3) monitor: interval="30000ms"
+     * dummy-1: migration-threshold=1000000:
+       * (2) start
++    * dummy-2: migration-threshold=1000000:
++      * (2) probe
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+@@ -3881,6 +3902,7 @@ Operations:
+       * (1) start
+ 
+ Failed Resource Actions:
++  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+@@ -3892,7 +3914,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 14 resource instances configured (1 DISABLED)
++  * 15 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 ]
+@@ -3914,7 +3936,7 @@ Full List of Resources:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="14" disabled="1" blocked="0"/>
++    <resources_configured number="15" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+-- 
+2.27.0
+
+
+From 85e76b8bdb4de261a9cb4858eeedd49fba0346a1 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 15:55:51 -0500
+Subject: [PATCH 09/21] Test: cts-cli: Add a failed probe on a new dummy-4
+ resource.
+
+This is to verify that these resources which are part of a group are
+displayed properly.  No code changes will be necessary, since groups are
+just several other resources all in the same pile.
+
+There are no code changes yet to properly handle displaying these
+operations, so the results here just reflect the current handling.
+---
+ cts/cli/crm_mon-partial.xml    |  4 +++
+ cts/cli/regression.crm_mon.exp | 51 ++++++++++++++++++++++------------
+ 2 files changed, 37 insertions(+), 18 deletions(-)
+
+diff --git a/cts/cli/crm_mon-partial.xml b/cts/cli/crm_mon-partial.xml
+index 1ce80ea58a..d4d4a70848 100644
+--- a/cts/cli/crm_mon-partial.xml
++++ b/cts/cli/crm_mon-partial.xml
+@@ -60,6 +60,7 @@
+             <nvpair id="inactive-dummy-meta_attributes-target-role" name="target-role" value="Stopped"/>
+           </meta_attributes>
+         </primitive>
++        <primitive class="ocf" id="dummy-4" provider="pacemaker" type="Dummy"/>
+       </group>
+       <primitive class="ocf" id="smart-mon" provider="pacemaker" type="HealthSMART">
+         <operations>
+@@ -108,6 +109,9 @@
+           <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
+             <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:3;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="3" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+           </lrm_resource>
++          <lrm_resource id="dummy-4" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-4_last_failure_0" operation_key="dummy-4_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="21:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;21:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="0" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
+           <lrm_resource id="smart-mon" type="HealthSMART" class="ocf" provider="pacemaker">
+             <lrm_rsc_op id="smart-mon_last_failure_0" operation_key="smart-mon_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.11.0" transition-key="3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;3:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="9" rc-code="5" op-status="0" interval="0" last-rc-change="1636490335" exec-time="33" queue-time="0" op-digest="b368e619fcd06788c996f6a2ef2efb6a"/>
+           </lrm_resource>
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index 8cf3a1215e..c524b199e3 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3470,7 +3470,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3483,12 +3483,13 @@ Active Resources:
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+-  * Resource Group: partially-active-group (1 member inactive):
++  * Resource Group: partially-active-group (2 members inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
++  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+@@ -3501,7 +3502,7 @@ Failed Resource Actions:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="15" disabled="1" blocked="0"/>
++    <resources_configured number="16" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+@@ -3548,7 +3549,7 @@ Failed Resource Actions:
+         </resource>
+       </replica>
+     </bundle>
+-    <group id="partially-active-group" number_resources="3" managed="true" disabled="false">
++    <group id="partially-active-group" number_resources="4" managed="true" disabled="false">
+       <resource id="dummy-1" resource_agent="ocf:pacemaker:Dummy" role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+         <node name="cluster02" id="2" cached="true"/>
+       </resource>
+@@ -3556,6 +3557,7 @@ Failed Resource Actions:
+         <node name="cluster02" id="2" cached="true"/>
+       </resource>
+       <resource id="dummy-3" resource_agent="ocf:pacemaker:Dummy" role="Stopped" target_role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
++      <resource id="dummy-4" resource_agent="ocf:pacemaker:Dummy" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+     </group>
+     <resource id="smart-mon" resource_agent="ocf:pacemaker:HealthSMART" role="Stopped" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
+   </resources>
+@@ -3587,6 +3589,9 @@ Failed Resource Actions:
+       <resource_history id="dummy-2" orphan="false" migration-threshold="1000000">
+         <operation_history call="2" task="probe" rc="3" rc_text="unimplemented feature" exec-time="33ms" queue-time="0ms"/>
+       </resource_history>
++      <resource_history id="dummy-4" orphan="false" migration-threshold="1000000">
++        <operation_history call="2" task="probe" rc="5" rc_text="not installed" exec-time="0ms" queue-time="0ms"/>
++      </resource_history>
+       <resource_history id="smart-mon" orphan="false" migration-threshold="1000000">
+         <operation_history call="9" task="probe" rc="5" rc_text="not installed" exec-time="33ms" queue-time="0ms"/>
+       </resource_history>
+@@ -3624,6 +3629,7 @@ Failed Resource Actions:
+   </node_history>
+   <failures>
+     <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="33" interval="0" task="monitor"/>
++    <failure op_key="dummy-4_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="0" interval="0" task="monitor"/>
+     <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
+     <failure op_key="ping_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="6" status="complete" last-rc-change="2021-11-18 13:11:42 -05:00" queued="0" exec="0" interval="0" task="monitor"/>
+   </failures>
+@@ -3638,7 +3644,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3656,10 +3662,12 @@ Full List of Resources:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+     * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
++  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+@@ -3671,7 +3679,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3687,7 +3695,7 @@ Full List of Resources:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+-    * 2/3	(ocf:pacemaker:Dummy):	Active cluster02
++    * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+ 
+ Node Attributes:
+   * Node: cluster01:
+@@ -3710,6 +3718,8 @@ Operations:
+       * (2) start
+     * dummy-2: migration-threshold=1000000:
+       * (2) probe
++    * dummy-4: migration-threshold=1000000:
++      * (2) probe
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+@@ -3736,6 +3746,7 @@ Operations:
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
++  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+@@ -3747,14 +3758,14 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Active Resources:
+-  * Resource Group: partially-active-group (1 member inactive):
++  * Resource Group: partially-active-group (2 members inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ =#=#=#= End test: Text output of partially active group - OK (0) =#=#=#=
+@@ -3766,7 +3777,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+@@ -3777,6 +3788,7 @@ Full List of Resources:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+     * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
++    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped
+ =#=#=#= End test: Text output of partially active group, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active group, with inactive resources
+ =#=#=#= Begin test: Text output of active member of partially active group =#=#=#=
+@@ -3786,14 +3798,14 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Active Resources:
+-  * Resource Group: partially-active-group (1 member inactive):
++  * Resource Group: partially-active-group (2 members inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+ =#=#=#= End test: Text output of active member of partially active group - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of active member of partially active group
+@@ -3804,14 +3816,14 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 cluster02 ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Active Resources:
+-  * Resource Group: partially-active-group (1 member inactive):
++  * Resource Group: partially-active-group (2 members inactive):
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+@@ -3825,7 +3837,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Node cluster01: online:
+@@ -3853,7 +3865,7 @@ Inactive Resources:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+-    * 2/3	(ocf:pacemaker:Dummy):	Active cluster02
++    * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Node Attributes:
+@@ -3877,6 +3889,8 @@ Operations:
+       * (2) start
+     * dummy-2: migration-threshold=1000000:
+       * (2) probe
++    * dummy-4: migration-threshold=1000000:
++      * (2) probe
+     * smart-mon: migration-threshold=1000000:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+@@ -3903,6 +3917,7 @@ Operations:
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
++  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+@@ -3914,7 +3929,7 @@ Cluster Summary:
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+-  * 15 resource instances configured (1 DISABLED)
++  * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+   * Online: [ cluster01 ]
+@@ -3936,7 +3951,7 @@ Full List of Resources:
+     <last_update time=""/>
+     <last_change time=""/>
+     <nodes_configured number="4"/>
+-    <resources_configured number="15" disabled="1" blocked="0"/>
++    <resources_configured number="16" disabled="1" blocked="0"/>
+     <cluster_options stonith-enabled="true" symmetric-cluster="true" no-quorum-policy="stop" maintenance-mode="false" stop-all-resources="false" stonith-timeout-ms="60000" priority-fencing-delay-ms="0"/>
+   </summary>
+   <nodes>
+-- 
+2.27.0
+
+
+From 206d733b6ce8e0ffcad243d282e8baa8c3ff72b4 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Tue, 23 Nov 2021 14:33:47 -0500
+Subject: [PATCH 10/21] Test: cts-cli: Add test output for a bundle resource
+ with a failed probe op.
+
+This just changes the existing failed bundle resource from not starting
+to failing with a reason.
+
+There are no code changes yet to properly handle displaying these
+operations, so the results here just reflect the current handling.
+---
+ cts/cli/crm_mon-partial.xml    |  9 ++++++++
+ cts/cli/regression.crm_mon.exp | 40 +++++++++++++++++++++++++---------
+ 2 files changed, 39 insertions(+), 10 deletions(-)
+
+diff --git a/cts/cli/crm_mon-partial.xml b/cts/cli/crm_mon-partial.xml
+index d4d4a70848..5981fc653c 100644
+--- a/cts/cli/crm_mon-partial.xml
++++ b/cts/cli/crm_mon-partial.xml
+@@ -178,5 +178,14 @@
+         </lrm_resources>
+       </lrm>
+     </node_state>
++    <node_state id="httpd-bundle-1" uname="httpd-bundle-1">
++      <lrm id="httpd-bundle-1">
++        <lrm_resources>
++          <lrm_resource id="httpd" class="ocf" provider="heartbeat" type="apache">
++            <lrm_rsc_op id="httpd_last_failure_0" operation_key="httpd_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="1:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:2;1:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" call-id="1" rc-code="2" op-status="0" interval="0" last-rc-change="1590608589" exec-time="0" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
+   </status>
+ </cib>
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index c524b199e3..b690a26fb6 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3482,7 +3482,7 @@ Active Resources:
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
++    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
+   * Resource Group: partially-active-group (2 members inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+@@ -3492,6 +3492,7 @@ Failed Resource Actions:
+   * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
++  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3509,7 +3510,7 @@ Failed Resource Actions:
+     <node name="cluster01" id="1" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="false" resources_running="5" type="member"/>
+     <node name="cluster02" id="2" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="true" is_dc="true" resources_running="5" type="member"/>
+     <node name="httpd-bundle-0" id="httpd-bundle-0" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="1" type="remote" id_as_resource="httpd-bundle-docker-0"/>
+-    <node name="httpd-bundle-1" id="httpd-bundle-1" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="0" type="remote" id_as_resource="httpd-bundle-docker-1"/>
++    <node name="httpd-bundle-1" id="httpd-bundle-1" online="true" standby="false" standby_onfail="false" maintenance="false" pending="false" unclean="false" shutdown="false" expected_up="false" is_dc="false" resources_running="1" type="remote" id_as_resource="httpd-bundle-docker-1"/>
+   </nodes>
+   <resources>
+     <clone id="ping-clone" multi_state="false" unique="false" managed="true" disabled="false" failed="false" failure_ignored="false">
+@@ -3540,7 +3541,9 @@ Failed Resource Actions:
+         <resource id="httpd-bundle-ip-192.168.122.132" resource_agent="ocf:heartbeat:IPaddr2" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+           <node name="cluster01" id="1" cached="true"/>
+         </resource>
+-        <resource id="httpd" resource_agent="ocf:heartbeat:apache" role="Stopped" target_role="Started" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
++        <resource id="httpd" resource_agent="ocf:heartbeat:apache" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="true" failure_ignored="false" nodes_running_on="1">
++          <node name="httpd-bundle-1" id="httpd-bundle-1" cached="true"/>
++        </resource>
+         <resource id="httpd-bundle-docker-1" resource_agent="ocf:heartbeat:docker" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+           <node name="cluster01" id="1" cached="true"/>
+         </resource>
+@@ -3626,12 +3629,18 @@ Failed Resource Actions:
+         <operation_history call="1" task="start" rc="0" rc_text="ok" exec-time="0ms" queue-time="0ms"/>
+       </resource_history>
+     </node>
++    <node name="httpd-bundle-1">
++      <resource_history id="httpd" orphan="false" migration-threshold="1000000">
++        <operation_history call="1" task="probe" rc="2" rc_text="invalid parameter" exec-time="0ms" queue-time="0ms"/>
++      </resource_history>
++    </node>
+   </node_history>
+   <failures>
+     <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="33" interval="0" task="monitor"/>
+     <failure op_key="dummy-4_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="0" interval="0" task="monitor"/>
+     <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
+     <failure op_key="ping_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="6" status="complete" last-rc-change="2021-11-18 13:11:42 -05:00" queued="0" exec="0" interval="0" task="monitor"/>
++    <failure op_key="httpd_monitor_0" node="httpd-bundle-1" exitstatus="invalid parameter" exitreason="" exitcode="2" call="1" status="complete" last-rc-change="2020-05-27 15:43:09 -04:00" queued="0" exec="0" interval="0" task="monitor"/>
+   </failures>
+   <status code="0" message="OK"/>
+ </pacemaker-result>
+@@ -3657,7 +3666,7 @@ Full List of Resources:
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
++    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
+   * Resource Group: partially-active-group:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+@@ -3670,6 +3679,7 @@ Failed Resource Actions:
+   * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
++  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+@@ -3693,7 +3703,7 @@ Full List of Resources:
+     * Stopped: [ cluster02 ]
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
++    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+ 
+@@ -3743,12 +3753,16 @@ Operations:
+   * Node: httpd-bundle-0@cluster02:
+     * httpd: migration-threshold=1000000:
+       * (1) start
++  * Node: httpd-bundle-1@cluster01:
++    * httpd: migration-threshold=1000000:
++      * (1) probe
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
++  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3856,14 +3870,14 @@ Node List:
+   * GuestNode httpd-bundle-0@cluster02: online:
+     * Resources:
+       * 1	(ocf:heartbeat:apache):	Active 
++  * GuestNode httpd-bundle-1@cluster01: online:
++    * Resources:
++      * 1	(ocf:heartbeat:apache):	Active 
+ 
+ Inactive Resources:
+   * Clone Set: ping-clone [ping]:
+     * Started: [ cluster01 ]
+     * Stopped: [ cluster02 ]
+-  * Container bundle set: httpd-bundle [pcmk:http]:
+-    * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+@@ -3914,12 +3928,16 @@ Operations:
+   * Node: httpd-bundle-0@cluster02:
+     * httpd: migration-threshold=1000000:
+       * (1) start
++  * Node: httpd-bundle-1@cluster01:
++    * httpd: migration-threshold=1000000:
++      * (1) probe
+ 
+ Failed Resource Actions:
+   * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+   * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+   * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+   * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
++  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+@@ -3939,7 +3957,7 @@ Full List of Resources:
+     * Started: [ cluster01 ]
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 Stopped cluster01
++    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ =#=#=#= End test: Text output of partially active resources, with inactive resources, filtered by node - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources, filtered by node
+@@ -3972,7 +3990,9 @@ Full List of Resources:
+         <resource id="httpd-bundle-ip-192.168.122.132" resource_agent="ocf:heartbeat:IPaddr2" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+           <node name="cluster01" id="1" cached="true"/>
+         </resource>
+-        <resource id="httpd" resource_agent="ocf:heartbeat:apache" role="Stopped" target_role="Started" active="false" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="0"/>
++        <resource id="httpd" resource_agent="ocf:heartbeat:apache" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="true" failure_ignored="false" nodes_running_on="1">
++          <node name="httpd-bundle-1" id="httpd-bundle-1" cached="true"/>
++        </resource>
+         <resource id="httpd-bundle-docker-1" resource_agent="ocf:heartbeat:docker" role="Started" target_role="Started" active="true" orphaned="false" blocked="false" managed="true" failed="false" failure_ignored="false" nodes_running_on="1">
+           <node name="cluster01" id="1" cached="true"/>
+         </resource>
+-- 
+2.27.0
+
+
+From 6240a28d36c0349e3b1d7f52c36106580c53bb01 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Mon, 22 Nov 2021 10:59:10 -0500
+Subject: [PATCH 11/21] Test: cts: Add --show-detail to a couple of the crm_mon
+ tests.
+
+This straightens out a couple differences in output between running
+tests locally (where --enable-compat-2.0 is not given, which would
+automatically add --show-detail) and running tests under mock (where
+that option is given).
+
+Note that this only really matters for failed resource actions, which
+were not previously output as part of any crm_mon regression test.  It
+is only the patches in this series that have introduced those, and thus
+this difference.
+---
+ cts/cli/regression.crm_mon.exp | 131 ++++++++++++++++++++-------------
+ cts/cts-cli.in                 |  10 +--
+ 2 files changed, 83 insertions(+), 58 deletions(-)
+
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index b690a26fb6..d7b9d98e2c 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3466,33 +3466,42 @@ Operations:
+ =#=#=#= Begin test: Text output of partially active resources =#=#=#=
+ Cluster Summary:
+   * Stack: corosync
+-  * Current DC: cluster02 (version) - partition with quorum
++  * Current DC: cluster02 (2) (version) - partition with quorum
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+   * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+-  * Online: [ cluster01 cluster02 ]
++  * Online: [ cluster01 (1) cluster02 (2) ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Active Resources:
+   * Clone Set: ping-clone [ping]:
+-    * Started: [ cluster01 ]
++    * ping	(ocf:pacemaker:ping):	 Started cluster01
++    * ping	(ocf:pacemaker:ping):	 Stopped
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+-    * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
++    * Replica[0]
++      * httpd-bundle-ip-192.168.122.131	(ocf:heartbeat:IPaddr2):	 Started cluster02
++      * httpd	(ocf:heartbeat:apache):	 Started httpd-bundle-0
++      * httpd-bundle-docker-0	(ocf:heartbeat:docker):	 Started cluster02
++      * httpd-bundle-0	(ocf:pacemaker:remote):	 Started cluster02
++    * Replica[1]
++      * httpd-bundle-ip-192.168.122.132	(ocf:heartbeat:IPaddr2):	 Started cluster01
++      * httpd	(ocf:heartbeat:apache):	 FAILED httpd-bundle-1
++      * httpd-bundle-docker-1	(ocf:heartbeat:docker):	 Started cluster01
++      * httpd-bundle-1	(ocf:pacemaker:remote):	 Started cluster01
+   * Resource Group: partially-active-group (2 members inactive):
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+-  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+-  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+-  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+-  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+-  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
++  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
++  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
++  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3649,24 +3658,32 @@ Failed Resource Actions:
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources =#=#=#=
+ Cluster Summary:
+   * Stack: corosync
+-  * Current DC: cluster02 (version) - partition with quorum
++  * Current DC: cluster02 (2) (version) - partition with quorum
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+   * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+-  * Online: [ cluster01 cluster02 ]
++  * Online: [ cluster01 (1) cluster02 (2) ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Full List of Resources:
+   * Clone Set: ping-clone [ping]:
+-    * Started: [ cluster01 ]
+-    * Stopped: [ cluster02 ]
++    * ping	(ocf:pacemaker:ping):	 Started cluster01
++    * ping	(ocf:pacemaker:ping):	 Stopped
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+-    * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
++    * Replica[0]
++      * httpd-bundle-ip-192.168.122.131	(ocf:heartbeat:IPaddr2):	 Started cluster02
++      * httpd	(ocf:heartbeat:apache):	 Started httpd-bundle-0
++      * httpd-bundle-docker-0	(ocf:heartbeat:docker):	 Started cluster02
++      * httpd-bundle-0	(ocf:pacemaker:remote):	 Started cluster02
++    * Replica[1]
++      * httpd-bundle-ip-192.168.122.132	(ocf:heartbeat:IPaddr2):	 Started cluster01
++      * httpd	(ocf:heartbeat:apache):	 FAILED httpd-bundle-1
++      * httpd-bundle-docker-1	(ocf:heartbeat:docker):	 Started cluster01
++      * httpd-bundle-1	(ocf:pacemaker:remote):	 Started cluster01
+   * Resource Group: partially-active-group:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+@@ -3675,46 +3692,54 @@ Full List of Resources:
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Failed Resource Actions:
+-  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+-  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+-  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+-  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+-  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
++  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
++  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
++  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+ Cluster Summary:
+   * Stack: corosync
+-  * Current DC: cluster02 (version) - partition with quorum
++  * Current DC: cluster02 (2) (version) - partition with quorum
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+   * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+-  * Online: [ cluster01 cluster02 ]
++  * Online: [ cluster01 (1) cluster02 (2) ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Full List of Resources:
+   * 0/1	(ocf:pacemaker:HealthSMART):	Active
+   * 1/1	(stonith:fence_xvm):	Active cluster01
+   * Clone Set: ping-clone [ping]:
+-    * Started: [ cluster01 ]
+-    * Stopped: [ cluster02 ]
++    * ping	(ocf:pacemaker:ping):	 Started cluster01
++    * ping	(ocf:pacemaker:ping):	 Stopped
+   * Container bundle set: httpd-bundle [pcmk:http]:
+-    * httpd-bundle-0 (192.168.122.131)	(ocf:heartbeat:apache):	 Started cluster02
+-    * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
++    * Replica[0]
++      * httpd-bundle-ip-192.168.122.131	(ocf:heartbeat:IPaddr2):	 Started cluster02
++      * httpd	(ocf:heartbeat:apache):	 Started httpd-bundle-0
++      * httpd-bundle-docker-0	(ocf:heartbeat:docker):	 Started cluster02
++      * httpd-bundle-0	(ocf:pacemaker:remote):	 Started cluster02
++    * Replica[1]
++      * httpd-bundle-ip-192.168.122.132	(ocf:heartbeat:IPaddr2):	 Started cluster01
++      * httpd	(ocf:heartbeat:apache):	 FAILED httpd-bundle-1
++      * httpd-bundle-docker-1	(ocf:heartbeat:docker):	 Started cluster01
++      * httpd-bundle-1	(ocf:pacemaker:remote):	 Started cluster01
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+ 
+ Node Attributes:
+-  * Node: cluster01:
++  * Node: cluster01 (1):
+     * pingd                           	: 1000      
+-  * Node: cluster02:
++  * Node: cluster02 (2):
+     * pingd                           	: 1000      
+ 
+ Operations:
+-  * Node: cluster02:
++  * Node: cluster02 (2):
+     * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
+       * (2) start
+       * (3) monitor: interval="60000ms"
+@@ -3734,7 +3759,7 @@ Operations:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+       * (6) probe
+-  * Node: cluster01:
++  * Node: cluster01 (1):
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+       * (20) monitor: interval="60000ms"
+@@ -3758,11 +3783,11 @@ Operations:
+       * (1) probe
+ 
+ Failed Resource Actions:
+-  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+-  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+-  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+-  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+-  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
++  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
++  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
++  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3826,14 +3851,14 @@ Active Resources:
+ =#=#=#= Begin test: Text output of inactive member of partially active group =#=#=#=
+ Cluster Summary:
+   * Stack: corosync
+-  * Current DC: cluster02 (version) - partition with quorum
++  * Current DC: cluster02 (2) (version) - partition with quorum
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+   * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+-  * Online: [ cluster01 cluster02 ]
++  * Online: [ cluster01 (1) cluster02 (2) ]
+   * GuestOnline: [ httpd-bundle-0@cluster02 httpd-bundle-1@cluster01 ]
+ 
+ Active Resources:
+@@ -3841,27 +3866,27 @@ Active Resources:
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+-  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+ =#=#=#= End test: Text output of inactive member of partially active group - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of inactive member of partially active group
+ =#=#=#= Begin test: Complete brief text output grouped by node, with inactive resources =#=#=#=
+ Cluster Summary:
+   * Stack: corosync
+-  * Current DC: cluster02 (version) - partition with quorum
++  * Current DC: cluster02 (2) (version) - partition with quorum
+   * Last updated:
+   * Last change:
+   * 4 nodes configured
+   * 16 resource instances configured (1 DISABLED)
+ 
+ Node List:
+-  * Node cluster01: online:
++  * Node cluster01 (1): online:
+     * Resources:
+       * 1	(ocf:heartbeat:IPaddr2):	Active 
+       * 1	(ocf:heartbeat:docker):	Active 
+       * 1	(ocf:pacemaker:ping):	Active 
+       * 1	(ocf:pacemaker:remote):	Active 
+       * 1	(stonith:fence_xvm):	Active 
+-  * Node cluster02: online:
++  * Node cluster02 (2): online:
+     * Resources:
+       * 1	(ocf:heartbeat:IPaddr2):	Active 
+       * 1	(ocf:heartbeat:docker):	Active 
+@@ -3876,20 +3901,20 @@ Node List:
+ 
+ Inactive Resources:
+   * Clone Set: ping-clone [ping]:
+-    * Started: [ cluster01 ]
+-    * Stopped: [ cluster02 ]
++    * ping	(ocf:pacemaker:ping):	 Started cluster01
++    * ping	(ocf:pacemaker:ping):	 Stopped
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
+ 
+ Node Attributes:
+-  * Node: cluster01:
++  * Node: cluster01 (1):
+     * pingd                           	: 1000      
+-  * Node: cluster02:
++  * Node: cluster02 (2):
+     * pingd                           	: 1000      
+ 
+ Operations:
+-  * Node: cluster02:
++  * Node: cluster02 (2):
+     * httpd-bundle-ip-192.168.122.131: migration-threshold=1000000:
+       * (2) start
+       * (3) monitor: interval="60000ms"
+@@ -3909,7 +3934,7 @@ Operations:
+       * (9) probe
+     * ping: migration-threshold=1000000:
+       * (6) probe
+-  * Node: cluster01:
++  * Node: cluster01 (1):
+     * Fencing: migration-threshold=1000000:
+       * (15) start
+       * (20) monitor: interval="60000ms"
+@@ -3933,11 +3958,11 @@ Operations:
+       * (1) probe
+ 
+ Failed Resource Actions:
+-  * dummy-2 probe on cluster02 returned 'unimplemented feature' at Wed Sep  2 12:17:38 2020 after 33ms
+-  * dummy-4 probe on cluster02 returned 'not installed' at Wed Sep  2 12:17:38 2020
+-  * smart-mon probe on cluster02 returned 'not installed' at Tue Nov  9 15:38:55 2021 after 33ms
+-  * ping probe on cluster02 returned 'not installed' at Thu Nov 18 13:11:42 2021
+-  * httpd probe on httpd-bundle-1 returned 'invalid parameter' at Wed May 27 15:43:09 2020
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
++  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
++  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
++  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+diff --git a/cts/cts-cli.in b/cts/cts-cli.in
+index d32bfb7ed1..457816afab 100755
+--- a/cts/cts-cli.in
++++ b/cts/cts-cli.in
+@@ -420,7 +420,7 @@ function test_crm_mon() {
+     export CIB_file="$test_home/cli/crm_mon-partial.xml"
+ 
+     desc="Text output of partially active resources"
+-    cmd="crm_mon -1"
++    cmd="crm_mon -1 --show-detail"
+     test_assert $CRM_EX_OK 0
+ 
+     desc="XML output of partially active resources"
+@@ -428,13 +428,13 @@ function test_crm_mon() {
+     test_assert_validate $CRM_EX_OK 0
+ 
+     desc="Text output of partially active resources, with inactive resources"
+-    cmd="crm_mon -1 -r"
++    cmd="crm_mon -1 -r --show-detail"
+     test_assert $CRM_EX_OK 0
+ 
+     # XML already includes inactive resources
+ 
+     desc="Complete brief text output, with inactive resources"
+-    cmd="crm_mon -1 -r --include=all --brief"
++    cmd="crm_mon -1 -r --include=all --brief --show-detail"
+     test_assert $CRM_EX_OK 0
+ 
+     # XML does not have a brief output option
+@@ -452,11 +452,11 @@ function test_crm_mon() {
+     test_assert $CRM_EX_OK 0
+ 
+     desc="Text output of inactive member of partially active group"
+-    cmd="crm_mon -1 --resource=dummy-2"
++    cmd="crm_mon -1 --resource=dummy-2 --show-detail"
+     test_assert $CRM_EX_OK 0
+ 
+     desc="Complete brief text output grouped by node, with inactive resources"
+-    cmd="crm_mon -1 -r --include=all --group-by-node --brief"
++    cmd="crm_mon -1 -r --include=all --group-by-node --brief --show-detail"
+     test_assert $CRM_EX_OK 0
+ 
+     desc="Text output of partially active resources, with inactive resources, filtered by node"
+-- 
+2.27.0
+
+
+From da14053e5957d84ed0647688d37733adc2f988a3 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Mon, 29 Nov 2021 15:05:42 -0500
+Subject: [PATCH 12/21] Test: scheduler: Add tests for failed probe operations.
+
+This adds identical sets of tests for primitive resources and cloned
+resources.  For the moment, the output reflects the current state of the
+code.  No changes have been made to properly handle these operations
+yet.
+
+Each set has three resources, and each is set up with a slightly
+different configuration of probe failures:
+
+(1) - Maskable probe failure on each node.
+(2) - Maskable probe failure on one node, successful "not running" probe
+      on the other node.  The resource should be started on the node
+      where "not running" was returned.
+(3) - Maskable probe failure on one node, non-maskable probe failure on
+      the other node.  The resource should not be running anywhere, and
+      should be stopped on the node with the non-maskable failure.
+---
+ cts/cts-scheduler.in                          |   2 +
+ cts/scheduler/dot/failed-probe-clone.dot      |  30 ++++
+ cts/scheduler/dot/failed-probe-primitive.dot  |   4 +
+ cts/scheduler/exp/failed-probe-clone.exp      | 141 ++++++++++++++++++
+ cts/scheduler/exp/failed-probe-primitive.exp  |  20 +++
+ .../scores/failed-probe-clone.scores          |  33 ++++
+ .../scores/failed-probe-primitive.scores      |   9 ++
+ .../summary/failed-probe-clone.summary        |  46 ++++++
+ .../summary/failed-probe-primitive.summary    |  27 ++++
+ cts/scheduler/xml/failed-probe-clone.xml      | 110 ++++++++++++++
+ cts/scheduler/xml/failed-probe-primitive.xml  |  71 +++++++++
+ 11 files changed, 493 insertions(+)
+ create mode 100644 cts/scheduler/dot/failed-probe-clone.dot
+ create mode 100644 cts/scheduler/dot/failed-probe-primitive.dot
+ create mode 100644 cts/scheduler/exp/failed-probe-clone.exp
+ create mode 100644 cts/scheduler/exp/failed-probe-primitive.exp
+ create mode 100644 cts/scheduler/scores/failed-probe-clone.scores
+ create mode 100644 cts/scheduler/scores/failed-probe-primitive.scores
+ create mode 100644 cts/scheduler/summary/failed-probe-clone.summary
+ create mode 100644 cts/scheduler/summary/failed-probe-primitive.summary
+ create mode 100644 cts/scheduler/xml/failed-probe-clone.xml
+ create mode 100644 cts/scheduler/xml/failed-probe-primitive.xml
+
+diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
+index 17fd6cefdf..3abcbc6c9d 100644
+--- a/cts/cts-scheduler.in
++++ b/cts/cts-scheduler.in
+@@ -113,6 +113,8 @@ TESTS = [
+         [ "probe-3", "Probe (pending node)" ],
+         [ "probe-4", "Probe (pending node + stopped resource)" ],
+         [ "probe-pending-node", "Probe (pending node + unmanaged resource)" ],
++        [ "failed-probe-primitive", "Maskable vs. unmaskable probe failures on primitive resources" ],
++        [ "failed-probe-clone", "Maskable vs. unmaskable probe failures on cloned resources" ],
+         [ "standby", "Standby" ],
+         [ "comments", "Comments" ],
+     ],
+diff --git a/cts/scheduler/dot/failed-probe-clone.dot b/cts/scheduler/dot/failed-probe-clone.dot
+new file mode 100644
+index 0000000000..90536b46ed
+--- /dev/null
++++ b/cts/scheduler/dot/failed-probe-clone.dot
+@@ -0,0 +1,30 @@
++ digraph "g" {
++"ping-1_clear_failcount_0 cluster01" [ style=bold color="green" fontcolor="black"]
++"ping-1_clear_failcount_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"ping-2-clone_running_0" [ style=bold color="green" fontcolor="orange"]
++"ping-2-clone_start_0" -> "ping-2-clone_running_0" [ style = bold]
++"ping-2-clone_start_0" -> "ping-2_start_0 cluster02" [ style = bold]
++"ping-2-clone_start_0" [ style=bold color="green" fontcolor="orange"]
++"ping-2_clear_failcount_0 cluster01" [ style=bold color="green" fontcolor="black"]
++"ping-2_clear_failcount_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"ping-2_monitor_10000 cluster02" [ style=bold color="green" fontcolor="black"]
++"ping-2_start_0 cluster02" -> "ping-2-clone_running_0" [ style = bold]
++"ping-2_start_0 cluster02" -> "ping-2_monitor_10000 cluster02" [ style = bold]
++"ping-2_start_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"ping-3-clone_running_0" [ style=dashed color="red" fontcolor="orange"]
++"ping-3-clone_start_0" -> "ping-3-clone_running_0" [ style = dashed]
++"ping-3-clone_start_0" -> "ping-3_start_0 <none>" [ style = dashed]
++"ping-3-clone_start_0" [ style=dashed color="red" fontcolor="orange"]
++"ping-3-clone_stop_0" -> "ping-3-clone_stopped_0" [ style = bold]
++"ping-3-clone_stop_0" -> "ping-3_stop_0 cluster01" [ style = bold]
++"ping-3-clone_stop_0" [ style=bold color="green" fontcolor="orange"]
++"ping-3-clone_stopped_0" -> "ping-3-clone_start_0" [ style = dashed]
++"ping-3-clone_stopped_0" [ style=bold color="green" fontcolor="orange"]
++"ping-3_clear_failcount_0 cluster01" [ style=bold color="green" fontcolor="black"]
++"ping-3_clear_failcount_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"ping-3_start_0 <none>" -> "ping-3-clone_running_0" [ style = dashed]
++"ping-3_start_0 <none>" [ style=dashed color="red" fontcolor="black"]
++"ping-3_stop_0 cluster01" -> "ping-3-clone_stopped_0" [ style = bold]
++"ping-3_stop_0 cluster01" -> "ping-3_start_0 <none>" [ style = dashed]
++"ping-3_stop_0 cluster01" [ style=bold color="green" fontcolor="black"]
++}
+diff --git a/cts/scheduler/dot/failed-probe-primitive.dot b/cts/scheduler/dot/failed-probe-primitive.dot
+new file mode 100644
+index 0000000000..6e0c83216a
+--- /dev/null
++++ b/cts/scheduler/dot/failed-probe-primitive.dot
+@@ -0,0 +1,4 @@
++ digraph "g" {
++"dummy-2_start_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"dummy-3_stop_0 cluster01" [ style=bold color="green" fontcolor="black"]
++}
+diff --git a/cts/scheduler/exp/failed-probe-clone.exp b/cts/scheduler/exp/failed-probe-clone.exp
+new file mode 100644
+index 0000000000..6be18935bf
+--- /dev/null
++++ b/cts/scheduler/exp/failed-probe-clone.exp
+@@ -0,0 +1,141 @@
++<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
++  <synapse id="0">
++    <action_set>
++      <crm_event id="6" operation="clear_failcount" operation_key="ping-1_clear_failcount_0" internal_operation_key="ping-1:0_clear_failcount_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="ping-1" long-id="ping-1:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.1" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="1">
++    <action_set>
++      <crm_event id="2" operation="clear_failcount" operation_key="ping-1_clear_failcount_0" internal_operation_key="ping-1:0_clear_failcount_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="ping-1" long-id="ping-1:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.1" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="2">
++    <action_set>
++      <rsc_op id="17" operation="monitor" operation_key="ping-2_monitor_10000" internal_operation_key="ping-2:0_monitor_10000" on_node="cluster02" on_node_uuid="2">
++        <primitive id="ping-2" long-id="ping-2:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_notify="false" CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_timeout="60000"  dampen="5s" host_list="192.168.122.2" multiplier="1000"/>
++      </rsc_op>
++    </action_set>
++    <inputs>
++      <trigger>
++        <rsc_op id="16" operation="start" operation_key="ping-2_start_0" internal_operation_key="ping-2:0_start_0" on_node="cluster02" on_node_uuid="2"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="3">
++    <action_set>
++      <rsc_op id="16" operation="start" operation_key="ping-2_start_0" internal_operation_key="ping-2:0_start_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="ping-2" long-id="ping-2:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_name="start" CRM_meta_notify="false" CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_timeout="60000"  dampen="5s" host_list="192.168.122.2" multiplier="1000"/>
++      </rsc_op>
++    </action_set>
++    <inputs>
++      <trigger>
++        <pseudo_event id="18" operation="start" operation_key="ping-2-clone_start_0"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="4">
++    <action_set>
++      <crm_event id="7" operation="clear_failcount" operation_key="ping-2_clear_failcount_0" internal_operation_key="ping-2:0_clear_failcount_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="ping-2" long-id="ping-2:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.2" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="5">
++    <action_set>
++      <crm_event id="3" operation="clear_failcount" operation_key="ping-2_clear_failcount_0" internal_operation_key="ping-2:0_clear_failcount_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="ping-2" long-id="ping-2:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.2" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="6" priority="1000000">
++    <action_set>
++      <pseudo_event id="19" operation="running" operation_key="ping-2-clone_running_0">
++        <attributes CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_timeout="20000" />
++      </pseudo_event>
++    </action_set>
++    <inputs>
++      <trigger>
++        <rsc_op id="16" operation="start" operation_key="ping-2_start_0" internal_operation_key="ping-2:0_start_0" on_node="cluster02" on_node_uuid="2"/>
++      </trigger>
++      <trigger>
++        <pseudo_event id="18" operation="start" operation_key="ping-2-clone_start_0"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="7">
++    <action_set>
++      <pseudo_event id="18" operation="start" operation_key="ping-2-clone_start_0">
++        <attributes CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_timeout="20000" />
++      </pseudo_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="8">
++    <action_set>
++      <rsc_op id="5" operation="stop" operation_key="ping-3_stop_0" internal_operation_key="ping-3:0_stop_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="ping-3" long-id="ping-3:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_name="stop" CRM_meta_notify="false" CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.3" multiplier="1000"/>
++      </rsc_op>
++    </action_set>
++    <inputs>
++      <trigger>
++        <pseudo_event id="24" operation="stop" operation_key="ping-3-clone_stop_0"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="9">
++    <action_set>
++      <crm_event id="4" operation="clear_failcount" operation_key="ping-3_clear_failcount_0" internal_operation_key="ping-3:0_clear_failcount_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="ping-3" long-id="ping-3:0" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="0" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.3" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="10">
++    <action_set>
++      <crm_event id="8" operation="clear_failcount" operation_key="ping-3_clear_failcount_0" internal_operation_key="ping-3:1_clear_failcount_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="ping-3" long-id="ping-3:1" class="ocf" provider="pacemaker" type="ping"/>
++        <attributes CRM_meta_clone="1" CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_op_no_wait="true" CRM_meta_timeout="20000"  dampen="5s" host_list="192.168.122.3" multiplier="1000"/>
++      </crm_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="11" priority="1000000">
++    <action_set>
++      <pseudo_event id="25" operation="stopped" operation_key="ping-3-clone_stopped_0">
++        <attributes CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_timeout="20000" />
++      </pseudo_event>
++    </action_set>
++    <inputs>
++      <trigger>
++        <rsc_op id="5" operation="stop" operation_key="ping-3_stop_0" internal_operation_key="ping-3:0_stop_0" on_node="cluster01" on_node_uuid="1"/>
++      </trigger>
++      <trigger>
++        <pseudo_event id="24" operation="stop" operation_key="ping-3-clone_stop_0"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="12">
++    <action_set>
++      <pseudo_event id="24" operation="stop" operation_key="ping-3-clone_stop_0">
++        <attributes CRM_meta_clone_max="2" CRM_meta_clone_node_max="1" CRM_meta_globally_unique="false" CRM_meta_notify="false" CRM_meta_timeout="20000" />
++      </pseudo_event>
++    </action_set>
++    <inputs/>
++  </synapse>
++</transition_graph>
+diff --git a/cts/scheduler/exp/failed-probe-primitive.exp b/cts/scheduler/exp/failed-probe-primitive.exp
+new file mode 100644
+index 0000000000..d0d8aa44dc
+--- /dev/null
++++ b/cts/scheduler/exp/failed-probe-primitive.exp
+@@ -0,0 +1,20 @@
++<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
++  <synapse id="0">
++    <action_set>
++      <rsc_op id="5" operation="start" operation_key="dummy-2_start_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="dummy-2" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="1">
++    <action_set>
++      <rsc_op id="2" operation="stop" operation_key="dummy-3_stop_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="dummy-3" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs/>
++  </synapse>
++</transition_graph>
+diff --git a/cts/scheduler/scores/failed-probe-clone.scores b/cts/scheduler/scores/failed-probe-clone.scores
+new file mode 100644
+index 0000000000..7418b7f153
+--- /dev/null
++++ b/cts/scheduler/scores/failed-probe-clone.scores
+@@ -0,0 +1,33 @@
++
++pcmk__clone_allocate: ping-1-clone allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-1-clone allocation score on cluster02: -INFINITY
++pcmk__clone_allocate: ping-1:0 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-1:0 allocation score on cluster02: -INFINITY
++pcmk__clone_allocate: ping-1:1 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-1:1 allocation score on cluster02: -INFINITY
++pcmk__clone_allocate: ping-2-clone allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-2-clone allocation score on cluster02: 0
++pcmk__clone_allocate: ping-2:0 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-2:0 allocation score on cluster02: 0
++pcmk__clone_allocate: ping-2:1 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-2:1 allocation score on cluster02: 0
++pcmk__clone_allocate: ping-3-clone allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-3-clone allocation score on cluster02: -INFINITY
++pcmk__clone_allocate: ping-3:0 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-3:0 allocation score on cluster02: -INFINITY
++pcmk__clone_allocate: ping-3:1 allocation score on cluster01: -INFINITY
++pcmk__clone_allocate: ping-3:1 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: Fencing allocation score on cluster01: 0
++pcmk__native_allocate: Fencing allocation score on cluster02: 0
++pcmk__native_allocate: ping-1:0 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-1:0 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: ping-1:1 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-1:1 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: ping-2:0 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-2:0 allocation score on cluster02: 0
++pcmk__native_allocate: ping-2:1 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-2:1 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: ping-3:0 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-3:0 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: ping-3:1 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: ping-3:1 allocation score on cluster02: -INFINITY
+diff --git a/cts/scheduler/scores/failed-probe-primitive.scores b/cts/scheduler/scores/failed-probe-primitive.scores
+new file mode 100644
+index 0000000000..f313029451
+--- /dev/null
++++ b/cts/scheduler/scores/failed-probe-primitive.scores
+@@ -0,0 +1,9 @@
++
++pcmk__native_allocate: Fencing allocation score on cluster01: 0
++pcmk__native_allocate: Fencing allocation score on cluster02: 0
++pcmk__native_allocate: dummy-1 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: dummy-1 allocation score on cluster02: -INFINITY
++pcmk__native_allocate: dummy-2 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: dummy-2 allocation score on cluster02: 0
++pcmk__native_allocate: dummy-3 allocation score on cluster01: -INFINITY
++pcmk__native_allocate: dummy-3 allocation score on cluster02: -INFINITY
+diff --git a/cts/scheduler/summary/failed-probe-clone.summary b/cts/scheduler/summary/failed-probe-clone.summary
+new file mode 100644
+index 0000000000..ca15c302aa
+--- /dev/null
++++ b/cts/scheduler/summary/failed-probe-clone.summary
+@@ -0,0 +1,46 @@
++Current cluster status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * Clone Set: ping-1-clone [ping-1]:
++      * Stopped: [ cluster01 cluster02 ]
++    * Clone Set: ping-2-clone [ping-2]:
++      * Stopped: [ cluster01 cluster02 ]
++    * Clone Set: ping-3-clone [ping-3]:
++      * ping-3	(ocf:pacemaker:ping):	 FAILED cluster01
++      * Stopped: [ cluster02 ]
++
++Transition Summary:
++  * Start      ping-2:0     ( cluster02 )
++  * Stop       ping-3:0     ( cluster01 )  due to node availability
++
++Executing Cluster Transition:
++  * Cluster action:  clear_failcount for ping-1 on cluster02
++  * Cluster action:  clear_failcount for ping-1 on cluster01
++  * Cluster action:  clear_failcount for ping-2 on cluster02
++  * Cluster action:  clear_failcount for ping-2 on cluster01
++  * Pseudo action:   ping-2-clone_start_0
++  * Cluster action:  clear_failcount for ping-3 on cluster01
++  * Cluster action:  clear_failcount for ping-3 on cluster02
++  * Pseudo action:   ping-3-clone_stop_0
++  * Resource action: ping-2          start on cluster02
++  * Pseudo action:   ping-2-clone_running_0
++  * Resource action: ping-3          stop on cluster01
++  * Pseudo action:   ping-3-clone_stopped_0
++  * Resource action: ping-2          monitor=10000 on cluster02
++
++Revised Cluster Status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * Clone Set: ping-1-clone [ping-1]:
++      * Stopped: [ cluster01 cluster02 ]
++    * Clone Set: ping-2-clone [ping-2]:
++      * Started: [ cluster02 ]
++      * Stopped: [ cluster01 ]
++    * Clone Set: ping-3-clone [ping-3]:
++      * Stopped: [ cluster01 cluster02 ]
+diff --git a/cts/scheduler/summary/failed-probe-primitive.summary b/cts/scheduler/summary/failed-probe-primitive.summary
+new file mode 100644
+index 0000000000..a634e7f00b
+--- /dev/null
++++ b/cts/scheduler/summary/failed-probe-primitive.summary
+@@ -0,0 +1,27 @@
++Current cluster status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-3	(ocf:pacemaker:Dummy):	 FAILED cluster01
++
++Transition Summary:
++  * Start      dummy-2     ( cluster02 )
++  * Stop       dummy-3     ( cluster01 )  due to node availability
++
++Executing Cluster Transition:
++  * Resource action: dummy-2         start on cluster02
++  * Resource action: dummy-3         stop on cluster01
++
++Revised Cluster Status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-2	(ocf:pacemaker:Dummy):	 Started cluster02
++    * dummy-3	(ocf:pacemaker:Dummy):	 Stopped
+diff --git a/cts/scheduler/xml/failed-probe-clone.xml b/cts/scheduler/xml/failed-probe-clone.xml
+new file mode 100644
+index 0000000000..f677585bab
+--- /dev/null
++++ b/cts/scheduler/xml/failed-probe-clone.xml
+@@ -0,0 +1,110 @@
++<cib crm_feature_set="3.3.0" validate-with="pacemaker-3.3" epoch="1" num_updates="37" admin_epoch="1" cib-last-written="Tue May  5 12:04:36 2020" update-origin="cluster01" update-client="crmd" update-user="hacluster" have-quorum="1" dc-uuid="2">
++  <configuration>
++    <crm_config>
++      <cluster_property_set id="cib-bootstrap-options">
++        <nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" value="false"/>
++        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.0.4-1.e97f9675f.git.el7-e97f9675f"/>
++        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
++        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="test-cluster"/>
++        <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
++        <nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
++      </cluster_property_set>
++    </crm_config>
++    <nodes>
++      <node id="1" uname="cluster01"/>
++      <node id="2" uname="cluster02"/>
++    </nodes>
++    <resources>
++      <primitive class="stonith" id="Fencing" type="fence_xvm">
++        <instance_attributes id="Fencing-instance_attributes">
++          <nvpair id="Fencing-instance_attributes-ip_family" name="ip_family" value="ipv4"/>
++        </instance_attributes>
++        <operations>
++          <op id="Fencing-monitor-interval-60s" interval="60s" name="monitor"/>
++        </operations>
++      </primitive>
++      <clone id="ping-1-clone">
++        <primitive class="ocf" id="ping-1" provider="pacemaker" type="ping">
++          <instance_attributes id="ping-1-instance_attributes">
++            <nvpair id="ping-1-instance_attributes-dampen" name="dampen" value="5s"/>
++            <nvpair id="ping-1-instance_attributes-host_list" name="host_list" value="192.168.122.1"/>
++            <nvpair id="ping-1-instance_attributes-multiplier" name="multiplier" value="1000"/>
++          </instance_attributes>
++          <operations>
++            <op id="ping-1-monitor-interval-10s" interval="10s" name="monitor" timeout="60s"/>
++            <op id="ping-1-start-interval-0s" interval="0s" name="start" timeout="60s"/>
++            <op id="ping-1-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
++          </operations>
++        </primitive>
++      </clone>
++      <clone id="ping-2-clone">
++        <primitive class="ocf" id="ping-2" provider="pacemaker" type="ping">
++          <instance_attributes id="ping-2-instance_attributes">
++            <nvpair id="ping-2-instance_attributes-dampen" name="dampen" value="5s"/>
++            <nvpair id="ping-2-instance_attributes-host_list" name="host_list" value="192.168.122.2"/>
++            <nvpair id="ping-2-instance_attributes-multiplier" name="multiplier" value="1000"/>
++          </instance_attributes>
++          <operations>
++            <op id="ping-2-monitor-interval-10s" interval="10s" name="monitor" timeout="60s"/>
++            <op id="ping-2-start-interval-0s" interval="0s" name="start" timeout="60s"/>
++            <op id="ping-2-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
++          </operations>
++        </primitive>
++      </clone>
++      <clone id="ping-3-clone">
++        <primitive class="ocf" id="ping-3" provider="pacemaker" type="ping">
++          <instance_attributes id="ping-3-instance_attributes">
++            <nvpair id="ping-3-instance_attributes-dampen" name="dampen" value="5s"/>
++            <nvpair id="ping-3-instance_attributes-host_list" name="host_list" value="192.168.122.3"/>
++            <nvpair id="ping-3-instance_attributes-multiplier" name="multiplier" value="1000"/>
++          </instance_attributes>
++          <operations>
++            <op id="ping-3-monitor-interval-10s" interval="10s" name="monitor" timeout="60s"/>
++            <op id="ping-3-start-interval-0s" interval="0s" name="start" timeout="60s"/>
++            <op id="ping-3-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
++          </operations>
++        </primitive>
++      </clone>
++    </resources>
++    <constraints/>
++  </configuration>
++  <status>
++    <node_state id="1" uname="cluster01" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="1">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:0;3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster01" call-id="3" rc-code="0" op-status="0" interval="0" last-rc-change="1588951272" exec-time="36" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++            <lrm_rsc_op id="Fencing_monitor_60000" operation_key="Fencing_monitor_60000" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="4" rc-code="0" op-status="0" interval="60000" last-rc-change="1590608589" exec-time="0" queue-time="0" op-digest="a88218bb6c7dc47e6586fc75fc2a8d69"/>
++          </lrm_resource>
++          <lrm_resource id="ping-1" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-1_last_failure_0" operation_key="ping-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="5" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="ping-2" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-2_last_failure_0" operation_key="ping-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="6" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="ping-3" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-3_last_failure_0" operation_key="ping-3_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="9:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:4;9:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="9" rc-code="4" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++    <node_state id="2" uname="cluster02" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="2">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:7;1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster02" call-id="1" rc-code="7" op-status="0" interval="0" last-rc-change="1588951263" exec-time="3" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++          </lrm_resource>
++          <lrm_resource id="ping-1" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-1_last_failure_0" operation_key="ping-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="ping-2" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-2_last_failure_0" operation_key="ping-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="7:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:7;7:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="7" rc-code="7" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="ping-3" class="ocf" provider="pacemaker" type="ping">
++            <lrm_rsc_op id="ping-3_last_failure_0" operation_key="ping-3_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="8:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;8:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="8" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++  </status>
++</cib>
+diff --git a/cts/scheduler/xml/failed-probe-primitive.xml b/cts/scheduler/xml/failed-probe-primitive.xml
+new file mode 100644
+index 0000000000..0c2f6416f5
+--- /dev/null
++++ b/cts/scheduler/xml/failed-probe-primitive.xml
+@@ -0,0 +1,71 @@
++<cib crm_feature_set="3.3.0" validate-with="pacemaker-3.3" epoch="1" num_updates="37" admin_epoch="1" cib-last-written="Tue May  5 12:04:36 2020" update-origin="cluster01" update-client="crmd" update-user="hacluster" have-quorum="1" dc-uuid="2">
++  <configuration>
++    <crm_config>
++      <cluster_property_set id="cib-bootstrap-options">
++        <nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" value="false"/>
++        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.0.4-1.e97f9675f.git.el7-e97f9675f"/>
++        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
++        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="test-cluster"/>
++        <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
++        <nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
++      </cluster_property_set>
++    </crm_config>
++    <nodes>
++      <node id="1" uname="cluster01"/>
++      <node id="2" uname="cluster02"/>
++    </nodes>
++    <resources>
++      <primitive class="stonith" id="Fencing" type="fence_xvm">
++        <instance_attributes id="Fencing-instance_attributes">
++          <nvpair id="Fencing-instance_attributes-ip_family" name="ip_family" value="ipv4"/>
++        </instance_attributes>
++        <operations>
++          <op id="Fencing-monitor-interval-60s" interval="60s" name="monitor"/>
++        </operations>
++      </primitive>
++      <primitive class="ocf" id="dummy-1" provider="pacemaker" type="Dummy"/>
++      <primitive class="ocf" id="dummy-2" provider="pacemaker" type="Dummy"/>
++      <primitive class="ocf" id="dummy-3" provider="pacemaker" type="Dummy"/>
++    </resources>
++    <constraints/>
++  </configuration>
++  <status>
++    <node_state id="1" uname="cluster01" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="1">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:0;3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster01" call-id="3" rc-code="0" op-status="0" interval="0" last-rc-change="1588951272" exec-time="36" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++            <lrm_rsc_op id="Fencing_monitor_60000" operation_key="Fencing_monitor_60000" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="4" rc-code="0" op-status="0" interval="60000" last-rc-change="1590608589" exec-time="0" queue-time="0" op-digest="a88218bb6c7dc47e6586fc75fc2a8d69"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-1_last_failure_0" operation_key="dummy-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="5" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;6:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="6" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-3" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-3_last_failure_0" operation_key="dummy-3_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="9:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:4;9:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="9" rc-code="4" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++    <node_state id="2" uname="cluster02" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="2">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:7;1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster02" call-id="1" rc-code="7" op-status="0" interval="0" last-rc-change="1588951263" exec-time="3" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-1_last_failure_0" operation_key="dummy-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="7:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:7;7:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="7" rc-code="7" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-3" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-3_last_failure_0" operation_key="dummy-3_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="8:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;8:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="8" rc-code="5" op-status="0" interval="0" last-rc-change="1599063458" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++  </status>
++</cib>
+-- 
+2.27.0
+
+
+From 271d50e7d6b0ee5ef670b571c6d7aae9272b75ad Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Thu, 11 Nov 2021 13:57:05 -0500
+Subject: [PATCH 13/21] Feature: scheduler: Don't output failed resource
+ probes...
+
+in the crm_mon "Failed Resource Actions" section.  It is expected that
+these one-off probes will fail, in which case displaying them in that
+section can just come across as confusing to the user.
+
+And update the crm_mon test output to account for these changes.
+
+See: rhbz#1506372
+---
+ cts/cli/regression.crm_mon.exp | 20 --------------------
+ lib/pengine/pe_output.c        |  4 ++++
+ 2 files changed, 4 insertions(+), 20 deletions(-)
+
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index d7b9d98e2c..b1643f8b29 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3498,10 +3498,6 @@ Active Resources:
+ 
+ Failed Resource Actions:
+   * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+-  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
+-  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
+-  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
+-  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3646,10 +3642,6 @@ Failed Resource Actions:
+   </node_history>
+   <failures>
+     <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="33" interval="0" task="monitor"/>
+-    <failure op_key="dummy-4_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="0" interval="0" task="monitor"/>
+-    <failure op_key="smart-mon_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="9" status="complete" last-rc-change="2021-11-09 15:38:55 -05:00" queued="0" exec="33" interval="0" task="monitor"/>
+-    <failure op_key="ping_monitor_0" node="cluster02" exitstatus="not installed" exitreason="" exitcode="5" call="6" status="complete" last-rc-change="2021-11-18 13:11:42 -05:00" queued="0" exec="0" interval="0" task="monitor"/>
+-    <failure op_key="httpd_monitor_0" node="httpd-bundle-1" exitstatus="invalid parameter" exitreason="" exitcode="2" call="1" status="complete" last-rc-change="2020-05-27 15:43:09 -04:00" queued="0" exec="0" interval="0" task="monitor"/>
+   </failures>
+   <status code="0" message="OK"/>
+ </pacemaker-result>
+@@ -3693,10 +3685,6 @@ Full List of Resources:
+ 
+ Failed Resource Actions:
+   * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+-  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
+-  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
+-  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
+-  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+@@ -3784,10 +3772,6 @@ Operations:
+ 
+ Failed Resource Actions:
+   * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+-  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
+-  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
+-  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
+-  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3959,10 +3943,6 @@ Operations:
+ 
+ Failed Resource Actions:
+   * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+-  * dummy-4_monitor_0 on cluster02 'not installed' (5): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=0ms
+-  * smart-mon_monitor_0 on cluster02 'not installed' (5): call=9, status='complete', last-rc-change='Tue Nov  9 15:38:55 2021', queued=0ms, exec=33ms
+-  * ping_monitor_0 on cluster02 'not installed' (5): call=6, status='complete', last-rc-change='Thu Nov 18 13:11:42 2021', queued=0ms, exec=0ms
+-  * httpd_monitor_0 on httpd-bundle-1 'invalid parameter' (2): call=1, status='complete', last-rc-change='Wed May 27 15:43:09 2020', queued=0ms, exec=0ms
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c
+index 715e001d51..84684598dd 100644
+--- a/lib/pengine/pe_output.c
++++ b/lib/pengine/pe_output.c
+@@ -1370,6 +1370,10 @@ failed_action_list(pcmk__output_t *out, va_list args) {
+             continue;
+         }
+ 
++        if (pcmk_xe_mask_probe_failure(xml_op)) {
++            continue;
++        }
++
+         id = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
+         if (parse_op_key(id ? id : ID(xml_op), &rsc, NULL, NULL) == FALSE) {
+             continue;
+-- 
+2.27.0
+
+
+From 90f641b9223c64701d494297ce3dd3382365acb8 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Tue, 9 Nov 2021 10:11:19 -0500
+Subject: [PATCH 14/21] Feature: scheduler: Add a function for finding a failed
+ probe action...
+
+for a given resource ID.  Optionally, a node ID can also be given to
+restrict the failed probe action to one run on the given node.
+Otherwise, just the first failed probe action for the resource ID will
+be returned.
+
+See: rhbz#1506372
+---
+ include/crm/pengine/internal.h |  2 ++
+ lib/pengine/utils.c            | 42 ++++++++++++++++++++++++++++++++++
+ 2 files changed, 44 insertions(+)
+
+diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
+index 8c8fbaca90..58dd2e8727 100644
+--- a/include/crm/pengine/internal.h
++++ b/include/crm/pengine/internal.h
+@@ -574,4 +574,6 @@ gboolean pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean che
+ gboolean pe__group_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
+ gboolean pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent);
+ 
++xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
++
+ #endif
+diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
+index 07753e173a..3151f0120b 100644
+--- a/lib/pengine/utils.c
++++ b/lib/pengine/utils.c
+@@ -2569,3 +2569,45 @@ pe__build_rsc_list(pe_working_set_t *data_set, const char *s) {
+ 
+     return resources;
+ }
++
++xmlNode *
++pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name)
++{
++    const char *rsc_id = rsc->id;
++
++    for (xmlNode *xml_op = pcmk__xml_first_child(rsc->cluster->failed); xml_op != NULL;
++         xml_op = pcmk__xml_next(xml_op)) {
++        const char *value = NULL;
++        char *op_id = NULL;
++
++        /* This resource operation is not a failed probe. */
++        if (!pcmk_xe_mask_probe_failure(xml_op)) {
++            continue;
++        }
++
++        /* This resource operation was not run on the given node.  Note that if name is
++         * NULL, this will always succeed.
++         */
++        value = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
++        if (value == NULL || !pcmk__str_eq(value, name, pcmk__str_casei|pcmk__str_null_matches)) {
++            continue;
++        }
++
++        /* This resource operation has no operation_key. */
++        value = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
++        if (!parse_op_key(value ? value : ID(xml_op), &op_id, NULL, NULL)) {
++            continue;
++        }
++
++        /* This resource operation's ID does not match the rsc_id we are looking for. */
++        if (!pcmk__str_eq(op_id, rsc_id, pcmk__str_none)) {
++            free(op_id);
++            continue;
++        }
++
++        free(op_id);
++        return xml_op;
++    }
++
++    return NULL;
++}
+-- 
+2.27.0
+
+
+From 2ad9774fe994554243078b131799fed0d1a6dffd Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Tue, 9 Nov 2021 15:43:24 -0500
+Subject: [PATCH 15/21] Feature: scheduler: Display the reason why a native rsc
+ probe failed.
+
+If inactive resources are being shown, add an extra blurb of text to any
+stopped resources that have a failed probe action indicating why the
+probe failed.
+
+And then add a new primitive resource to crm_mon-partial.xml with a
+failed probe operation and update the expected test output.
+
+See: rhbz#1506372
+---
+ cts/cli/regression.crm_mon.exp                        | 10 +++++-----
+ cts/scheduler/summary/failed-probe-primitive.summary  |  8 ++++----
+ cts/scheduler/summary/multiply-active-stonith.summary |  2 +-
+ lib/pengine/native.c                                  | 11 +++++++++++
+ 4 files changed, 21 insertions(+), 10 deletions(-)
+
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index b1643f8b29..4333caa11c 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3680,8 +3680,8 @@ Full List of Resources:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+     * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
+-    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped
+-  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
++    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped (not installed) 
+ 
+ Failed Resource Actions:
+   * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
+@@ -3811,7 +3811,7 @@ Full List of Resources:
+     * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+     * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (disabled)
+-    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-4	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
+ =#=#=#= End test: Text output of partially active group, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active group, with inactive resources
+ =#=#=#= Begin test: Text output of active member of partially active group =#=#=#=
+@@ -3889,7 +3889,7 @@ Inactive Resources:
+     * ping	(ocf:pacemaker:ping):	 Stopped
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+-  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped (not installed) 
+ 
+ Node Attributes:
+   * Node: cluster01 (1):
+@@ -3963,7 +3963,7 @@ Full List of Resources:
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * httpd-bundle-1 (192.168.122.132)	(ocf:heartbeat:apache):	 FAILED cluster01
+-  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped
++  * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped (not installed) 
+ =#=#=#= End test: Text output of partially active resources, with inactive resources, filtered by node - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources, filtered by node
+ =#=#=#= Begin test: Text output of partially active resources, filtered by node =#=#=#=
+diff --git a/cts/scheduler/summary/failed-probe-primitive.summary b/cts/scheduler/summary/failed-probe-primitive.summary
+index a634e7f00b..ea8edae494 100644
+--- a/cts/scheduler/summary/failed-probe-primitive.summary
++++ b/cts/scheduler/summary/failed-probe-primitive.summary
+@@ -4,8 +4,8 @@ Current cluster status:
+ 
+   * Full List of Resources:
+     * Fencing	(stonith:fence_xvm):	 Started cluster01
+-    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped
+-    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
++    * dummy-2	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
+     * dummy-3	(ocf:pacemaker:Dummy):	 FAILED cluster01
+ 
+ Transition Summary:
+@@ -22,6 +22,6 @@ Revised Cluster Status:
+ 
+   * Full List of Resources:
+     * Fencing	(stonith:fence_xvm):	 Started cluster01
+-    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
+     * dummy-2	(ocf:pacemaker:Dummy):	 Started cluster02
+-    * dummy-3	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-3	(ocf:pacemaker:Dummy):	 Stopped (not installed) 
+diff --git a/cts/scheduler/summary/multiply-active-stonith.summary b/cts/scheduler/summary/multiply-active-stonith.summary
+index 8ce21d68ee..ec37de03b0 100644
+--- a/cts/scheduler/summary/multiply-active-stonith.summary
++++ b/cts/scheduler/summary/multiply-active-stonith.summary
+@@ -25,4 +25,4 @@ Revised Cluster Status:
+ 
+   * Full List of Resources:
+     * fencer	(stonith:fence_ipmilan):	 Started node3
+-    * rsc1	(lsb:rsc1):	 Stopped
++    * rsc1	(lsb:rsc1):	 Stopped (not installed) 
+diff --git a/lib/pengine/native.c b/lib/pengine/native.c
+index 36121c527f..a95c90c09a 100644
+--- a/lib/pengine/native.c
++++ b/lib/pengine/native.c
+@@ -599,6 +599,17 @@ pcmk__native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node
+         g_string_append_printf(outstr, " %s", node->details->uname);
+     }
+ 
++    // Failed probe operation
++    if (native_displayable_role(rsc) == RSC_ROLE_STOPPED) {
++        xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node ? node->details->uname : NULL);
++        if (probe_op != NULL) {
++            int rc;
++
++            pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0);
++            g_string_append_printf(outstr, " (%s) ", services_ocf_exitcode_str(rc));
++        }
++    }
++
+     // Flags, as: (<flag> [...])
+     if (node && !(node->details->online) && node->details->unclean) {
+         have_flags = add_output_flag(outstr, "UNCLEAN", have_flags);
+-- 
+2.27.0
+
+
+From b9ca2e834ee01b35c03f153438ef8828b609fb38 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Thu, 18 Nov 2021 10:41:42 -0500
+Subject: [PATCH 16/21] Refactor: scheduler: Rearrange pe__clone_default.
+
+Instead of the single stopped list, maintain a hash table where the keys
+are nodes and the values are the status of the node.  For now, this is
+just "Stopped" or "Stopped (disabled)" but in the future will be
+expanded to cover failed probe operations.
+---
+ lib/pengine/clone.c | 103 +++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 82 insertions(+), 21 deletions(-)
+
+diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
+index 5569c6b6e9..58fb24d24e 100644
+--- a/lib/pengine/clone.c
++++ b/lib/pengine/clone.c
+@@ -28,6 +28,55 @@
+ #define UNPROMOTED_INSTANCES RSC_ROLE_UNPROMOTED_S
+ #endif
+ 
++static GList *
++sorted_hash_table_values(GHashTable *table)
++{
++    GList *retval = NULL;
++    GHashTableIter iter;
++    gpointer key, value;
++
++    g_hash_table_iter_init(&iter, table);
++    while (g_hash_table_iter_next(&iter, &key, &value)) {
++        if (!g_list_find_custom(retval, value, (GCompareFunc) strcmp)) {
++            retval = g_list_prepend(retval, (char *) value);
++        }
++    }
++
++    retval = g_list_sort(retval, (GCompareFunc) strcmp);
++    return retval;
++}
++
++static GList *
++nodes_with_status(GHashTable *table, const char *status)
++{
++    GList *retval = NULL;
++    GHashTableIter iter;
++    gpointer key, value;
++
++    g_hash_table_iter_init(&iter, table);
++    while (g_hash_table_iter_next(&iter, &key, &value)) {
++        if (!strcmp((char *) value, status)) {
++            retval = g_list_prepend(retval, key);
++        }
++    }
++
++    retval = g_list_sort(retval, (GCompareFunc) pcmk__numeric_strcasecmp);
++    return retval;
++}
++
++static char *
++node_list_to_str(GList *list)
++{
++    char *retval = NULL;
++    size_t len = 0;
++
++    for (GList *iter = list; iter != NULL; iter = iter->next) {
++        pcmk__add_word(&retval, &len, (char *) iter->data);
++    }
++
++    return retval;
++}
++
+ static void
+ clone_header(pcmk__output_t *out, int *rc, pe_resource_t *rsc, clone_variant_data_t *clone_data)
+ {
+@@ -710,10 +759,10 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+     GList *only_node = va_arg(args, GList *);
+     GList *only_rsc = va_arg(args, GList *);
+ 
++    GHashTable *stopped = pcmk__strkey_table(free, free);
++
+     char *list_text = NULL;
+-    char *stopped_list = NULL;
+     size_t list_text_len = 0;
+-    size_t stopped_list_len = 0;
+ 
+     GList *promoted_list = NULL;
+     GList *started_list = NULL;
+@@ -768,7 +817,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+             // List stopped instances when requested (except orphans)
+             if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
+                 && pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
+-                pcmk__add_word(&stopped_list, &stopped_list_len, child_rsc->id);
++                g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
+             }
+ 
+         } else if (is_set_recursive(child_rsc, pe_rsc_orphan, TRUE)
+@@ -822,7 +871,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+     }
+ 
+     if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
+-        free(stopped_list);
++        g_hash_table_destroy(stopped);
+         PCMK__OUTPUT_LIST_FOOTER(out, rc);
+         return pcmk_rc_ok;
+     }
+@@ -890,23 +939,15 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+     }
+ 
+     if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
+-        const char *state = "Stopped";
+-        enum rsc_role_e role = configured_role(rsc);
+-
+-        if (role == RSC_ROLE_STOPPED) {
+-            state = "Stopped (disabled)";
+-        }
+-
+         if (!pcmk_is_set(rsc->flags, pe_rsc_unique)
+             && (clone_data->clone_max > active_instances)) {
+ 
+             GList *nIter;
+             GList *list = g_hash_table_get_values(rsc->allowed_nodes);
+ 
+-            /* Custom stopped list for non-unique clones */
+-            free(stopped_list);
+-            stopped_list = NULL;
+-            stopped_list_len = 0;
++            /* Custom stopped table for non-unique clones */
++            g_hash_table_destroy(stopped);
++            stopped = pcmk__strkey_table(free, free);
+ 
+             if (list == NULL) {
+                 /* Clusters with symmetrical=false haven't calculated allowed_nodes yet
+@@ -922,19 +963,39 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+                 if (pe_find_node(rsc->running_on, node->details->uname) == NULL &&
+                     pcmk__str_in_list(node->details->uname, only_node,
+                                       pcmk__str_star_matches|pcmk__str_casei)) {
+-                    pcmk__add_word(&stopped_list, &stopped_list_len,
+-                                   node->details->uname);
++                    const char *state = "Stopped";
++
++                    if (configured_role(rsc) == RSC_ROLE_STOPPED) {
++                        state = "Stopped (disabled)";
++                    }
++
++                    g_hash_table_insert(stopped, strdup(node->details->uname),
++                                        strdup(state));
+                 }
+             }
+             g_list_free(list);
+         }
+ 
+-        if (stopped_list != NULL) {
++        if (g_hash_table_size(stopped) > 0) {
++            GList *list = sorted_hash_table_values(stopped);
++
+             clone_header(out, &rc, rsc, clone_data);
+ 
+-            out->list_item(out, NULL, "%s: [ %s ]", state, stopped_list);
+-            free(stopped_list);
+-            stopped_list_len = 0;
++            for (GList *status_iter = list; status_iter != NULL; status_iter = status_iter->next) {
++                const char *status = status_iter->data;
++                GList *nodes = nodes_with_status(stopped, status);
++                char *str = node_list_to_str(nodes);
++
++                if (str != NULL) {
++                    out->list_item(out, NULL, "%s: [ %s ]", status, str);
++                    free(str);
++                }
++
++                g_list_free(nodes);
++            }
++
++            g_list_free(list);
++            g_hash_table_destroy(stopped);
+ 
+         /* If there are no instances of this clone (perhaps because there are no
+          * nodes configured), simply output the clone header by itself.  This can
+-- 
+2.27.0
+
+
+From 0228a64cea412936fb8ee91b0f83f9800048d3ba Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Fri, 19 Nov 2021 10:06:18 -0500
+Subject: [PATCH 17/21] Feature: scheduler: Display the reason why a clone rsc
+ probe failed.
+
+This is similar to the previous commit that adds reasons for primitive
+resources.
+
+See: rhbz#1506372
+---
+ cts/cli/regression.crm_mon.exp                |  8 +++----
+ .../summary/failed-probe-clone.summary        | 14 +++++++------
+ include/crm/pengine/internal.h                |  2 ++
+ lib/pengine/clone.c                           | 21 +++++++++++++++++--
+ lib/pengine/utils.c                           |  7 +++++++
+ 5 files changed, 40 insertions(+), 12 deletions(-)
+
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index 4333caa11c..5688500ce5 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3479,7 +3479,7 @@ Node List:
+ Active Resources:
+   * Clone Set: ping-clone [ping]:
+     * ping	(ocf:pacemaker:ping):	 Started cluster01
+-    * ping	(ocf:pacemaker:ping):	 Stopped
++    * ping	(ocf:pacemaker:ping):	 Stopped (not installed) 
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * Replica[0]
+@@ -3663,7 +3663,7 @@ Node List:
+ Full List of Resources:
+   * Clone Set: ping-clone [ping]:
+     * ping	(ocf:pacemaker:ping):	 Started cluster01
+-    * ping	(ocf:pacemaker:ping):	 Stopped
++    * ping	(ocf:pacemaker:ping):	 Stopped (not installed) 
+   * Fencing	(stonith:fence_xvm):	 Started cluster01
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * Replica[0]
+@@ -3705,7 +3705,7 @@ Full List of Resources:
+   * 1/1	(stonith:fence_xvm):	Active cluster01
+   * Clone Set: ping-clone [ping]:
+     * ping	(ocf:pacemaker:ping):	 Started cluster01
+-    * ping	(ocf:pacemaker:ping):	 Stopped
++    * ping	(ocf:pacemaker:ping):	 Stopped (not installed) 
+   * Container bundle set: httpd-bundle [pcmk:http]:
+     * Replica[0]
+       * httpd-bundle-ip-192.168.122.131	(ocf:heartbeat:IPaddr2):	 Started cluster02
+@@ -3886,7 +3886,7 @@ Node List:
+ Inactive Resources:
+   * Clone Set: ping-clone [ping]:
+     * ping	(ocf:pacemaker:ping):	 Started cluster01
+-    * ping	(ocf:pacemaker:ping):	 Stopped
++    * ping	(ocf:pacemaker:ping):	 Stopped (not installed) 
+   * Resource Group: partially-active-group:
+     * 2/4	(ocf:pacemaker:Dummy):	Active cluster02
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped (not installed) 
+diff --git a/cts/scheduler/summary/failed-probe-clone.summary b/cts/scheduler/summary/failed-probe-clone.summary
+index ca15c302aa..febee14400 100644
+--- a/cts/scheduler/summary/failed-probe-clone.summary
++++ b/cts/scheduler/summary/failed-probe-clone.summary
+@@ -5,12 +5,13 @@ Current cluster status:
+   * Full List of Resources:
+     * Fencing	(stonith:fence_xvm):	 Started cluster01
+     * Clone Set: ping-1-clone [ping-1]:
+-      * Stopped: [ cluster01 cluster02 ]
++      * Stopped (not installed): [ cluster01 cluster02 ]
+     * Clone Set: ping-2-clone [ping-2]:
+-      * Stopped: [ cluster01 cluster02 ]
++      * Stopped: [ cluster02 ]
++      * Stopped (not installed): [ cluster01 ]
+     * Clone Set: ping-3-clone [ping-3]:
+       * ping-3	(ocf:pacemaker:ping):	 FAILED cluster01
+-      * Stopped: [ cluster02 ]
++      * Stopped (not installed): [ cluster02 ]
+ 
+ Transition Summary:
+   * Start      ping-2:0     ( cluster02 )
+@@ -38,9 +39,10 @@ Revised Cluster Status:
+   * Full List of Resources:
+     * Fencing	(stonith:fence_xvm):	 Started cluster01
+     * Clone Set: ping-1-clone [ping-1]:
+-      * Stopped: [ cluster01 cluster02 ]
++      * Stopped (not installed): [ cluster01 cluster02 ]
+     * Clone Set: ping-2-clone [ping-2]:
+       * Started: [ cluster02 ]
+-      * Stopped: [ cluster01 ]
++      * Stopped (not installed): [ cluster01 ]
+     * Clone Set: ping-3-clone [ping-3]:
+-      * Stopped: [ cluster01 cluster02 ]
++      * Stopped: [ cluster01 ]
++      * Stopped (not installed): [ cluster02 ]
+diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
+index 58dd2e8727..2b20da6e5f 100644
+--- a/include/crm/pengine/internal.h
++++ b/include/crm/pengine/internal.h
+@@ -576,4 +576,6 @@ gboolean pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean ch
+ 
+ xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
+ 
++const char *pe__clone_child_id(pe_resource_t *rsc);
++
+ #endif
+diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
+index 58fb24d24e..ef4bdc0edf 100644
+--- a/lib/pengine/clone.c
++++ b/lib/pengine/clone.c
+@@ -963,14 +963,23 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+                 if (pe_find_node(rsc->running_on, node->details->uname) == NULL &&
+                     pcmk__str_in_list(node->details->uname, only_node,
+                                       pcmk__str_star_matches|pcmk__str_casei)) {
++                    xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node->details->uname);
+                     const char *state = "Stopped";
+ 
+                     if (configured_role(rsc) == RSC_ROLE_STOPPED) {
+                         state = "Stopped (disabled)";
+                     }
+ 
+-                    g_hash_table_insert(stopped, strdup(node->details->uname),
+-                                        strdup(state));
++                    if (probe_op != NULL) {
++                        int rc;
++
++                        pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0);
++                        g_hash_table_insert(stopped, strdup(node->details->uname),
++                                            crm_strdup_printf("Stopped (%s)", services_ocf_exitcode_str(rc)));
++                    } else {
++                        g_hash_table_insert(stopped, strdup(node->details->uname),
++                                            strdup(state));
++                    }
+                 }
+             }
+             g_list_free(list);
+@@ -1113,3 +1122,11 @@ pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent
+ 
+     return !passes;
+ }
++
++const char *
++pe__clone_child_id(pe_resource_t *rsc)
++{
++    clone_variant_data_t *clone_data = NULL;
++    get_clone_variant_data(clone_data, rsc);
++    return ID(clone_data->xml_obj_child);
++}
+diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
+index 3151f0120b..6c4f3b6971 100644
+--- a/lib/pengine/utils.c
++++ b/lib/pengine/utils.c
+@@ -2573,8 +2573,15 @@ pe__build_rsc_list(pe_working_set_t *data_set, const char *s) {
+ xmlNode *
+ pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name)
+ {
++    pe_resource_t *parent = uber_parent(rsc);
+     const char *rsc_id = rsc->id;
+ 
++    if (rsc->variant == pe_clone) {
++        rsc_id = pe__clone_child_id(rsc);
++    } else if (parent->variant == pe_clone) {
++        rsc_id = pe__clone_child_id(parent);
++    }
++
+     for (xmlNode *xml_op = pcmk__xml_first_child(rsc->cluster->failed); xml_op != NULL;
+          xml_op = pcmk__xml_next(xml_op)) {
+         const char *value = NULL;
+-- 
+2.27.0
+
+
+From cf8b01da93fce87526617fefdcee6eb9f6ecdbd1 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Wed, 24 Nov 2021 10:57:05 -0500
+Subject: [PATCH 18/21] Test: cts-cli: Update the last-rc-change sed
+ expression.
+
+This can now occur in both the XML output (where it's wrapped in double
+quotes) and the text output (where it's wrapped in single quotes and
+followed by a comma).  In addition, a plus or minus can occur in the
+time string.
+
+The "{0,1}" syntax takes the place of a "?" for marking the optional
+comma.  In FreeBSD sed, "?" doesn't mean anything special.
+---
+ cts/cli/regression.crm_mon.exp | 12 ++++++------
+ cts/cts-cli.in                 |  2 +-
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/cts/cli/regression.crm_mon.exp b/cts/cli/regression.crm_mon.exp
+index 5688500ce5..957758832d 100644
+--- a/cts/cli/regression.crm_mon.exp
++++ b/cts/cli/regression.crm_mon.exp
+@@ -3497,7 +3497,7 @@ Active Resources:
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+-  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms
+ =#=#=#= End test: Text output of partially active resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources
+ =#=#=#= Begin test: XML output of partially active resources =#=#=#=
+@@ -3641,7 +3641,7 @@ Failed Resource Actions:
+     </node>
+   </node_history>
+   <failures>
+-    <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" last-rc-change="2020-09-02 12:17:38 -04:00" queued="0" exec="33" interval="0" task="monitor"/>
++    <failure op_key="dummy-2_monitor_0" node="cluster02" exitstatus="unimplemented feature" exitreason="" exitcode="3" call="2" status="complete" queued="0" exec="33" interval="0" task="monitor"/>
+   </failures>
+   <status code="0" message="OK"/>
+ </pacemaker-result>
+@@ -3684,7 +3684,7 @@ Full List of Resources:
+   * smart-mon	(ocf:pacemaker:HealthSMART):	 Stopped (not installed) 
+ 
+ Failed Resource Actions:
+-  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms
+ =#=#=#= End test: Text output of partially active resources, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of partially active resources, with inactive resources
+ =#=#=#= Begin test: Complete brief text output, with inactive resources =#=#=#=
+@@ -3771,7 +3771,7 @@ Operations:
+       * (1) probe
+ 
+ Failed Resource Actions:
+-  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms
+ =#=#=#= End test: Complete brief text output, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output, with inactive resources
+ =#=#=#= Begin test: Text output of partially active group =#=#=#=
+@@ -3850,7 +3850,7 @@ Active Resources:
+     * dummy-2	(ocf:pacemaker:Dummy):	 FAILED cluster02
+ 
+ Failed Resource Actions:
+-  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms
+ =#=#=#= End test: Text output of inactive member of partially active group - OK (0) =#=#=#=
+ * Passed: crm_mon        - Text output of inactive member of partially active group
+ =#=#=#= Begin test: Complete brief text output grouped by node, with inactive resources =#=#=#=
+@@ -3942,7 +3942,7 @@ Operations:
+       * (1) probe
+ 
+ Failed Resource Actions:
+-  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', last-rc-change='Wed Sep  2 12:17:38 2020', queued=0ms, exec=33ms
++  * dummy-2_monitor_0 on cluster02 'unimplemented feature' (3): call=2, status='complete', queued=0ms, exec=33ms
+ =#=#=#= End test: Complete brief text output grouped by node, with inactive resources - OK (0) =#=#=#=
+ * Passed: crm_mon        - Complete brief text output grouped by node, with inactive resources
+ =#=#=#= Begin test: Text output of partially active resources, with inactive resources, filtered by node =#=#=#=
+diff --git a/cts/cts-cli.in b/cts/cts-cli.in
+index 457816afab..72e9a1e912 100755
+--- a/cts/cts-cli.in
++++ b/cts/cts-cli.in
+@@ -1870,7 +1870,7 @@ for t in $tests; do
+         -e 's/.*\(unpack_.*\)@.*\.c:[0-9][0-9]*)/\1/g' \
+         -e 's/.*\(update_validation\)@.*\.c:[0-9][0-9]*)/\1/g' \
+         -e 's/.*\(apply_upgrade\)@.*\.c:[0-9][0-9]*)/\1/g' \
+-        -e 's/ last-rc-change=\"[A-Za-z0-9: ]*\"//'\
++        -e "s/ last-rc-change=['\"][-+A-Za-z0-9: ]*['\"],\{0,1\}//" \
+         -e 's|^/tmp/cts-cli\.validity\.bad.xml\.[^:]*:|validity.bad.xml:|'\
+         -e 's/^Entity: line [0-9][0-9]*: //'\
+         -e 's/\(validation ([0-9][0-9]* of \)[0-9][0-9]*\().*\)/\1X\2/' \
+-- 
+2.27.0
+
+
+From dea61f1b6507fbc978e040c1555384d8d7ffa9f3 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Wed, 1 Dec 2021 16:23:14 -0500
+Subject: [PATCH 19/21] Fix: include: Bump feature set to 3.12.0.
+
+This is for the scheduler handling changing regarding maskable probe
+failures.
+
+See: rhbz#1506372.
+---
+ include/crm/crm.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/crm/crm.h b/include/crm/crm.h
+index 04d2324d75..16b35e9c55 100644
+--- a/include/crm/crm.h
++++ b/include/crm/crm.h
+@@ -66,7 +66,7 @@ extern "C" {
+  * >=3.0.13: Fail counts include operation name and interval
+  * >=3.2.0:  DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED
+  */
+-#  define CRM_FEATURE_SET		"3.11.0"
++#  define CRM_FEATURE_SET		"3.12.0"
+ 
+ /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
+  * recipient of a CPG message. This imposes an arbitrary limit on cluster node
+-- 
+2.27.0
+
+
+From fef2c61ef462c221809dc91467ea1e96d5478c74 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Mon, 6 Dec 2021 16:42:15 -0500
+Subject: [PATCH 20/21] Feature: scheduler: Handle masked probes in the
+ scheduler.
+
+These probe operations get their rc/status codes mapped to not
+running/done, but still ensures they end up in the list of failed
+operations so tool output continues to display them properly.
+
+Note that failures on bundled resources do not get masked.
+
+There are no test case changes for this patch.
+
+See: rhbz#1506372.
+---
+ lib/pengine/unpack.c | 42 +++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 37 insertions(+), 5 deletions(-)
+
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index b659f319fb..f3583e97d8 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3169,6 +3169,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node,
+         }
+     }
+ 
++    if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
++        *status = PCMK_EXEC_DONE;
++        *rc = PCMK_OCF_NOT_RUNNING;
++    }
++
+     /* If the executor reported an operation status of anything but done or
+      * error, consider that final. But for done or error, we know better whether
+      * it should be treated as a failure or not, because we know the expected
+@@ -3567,12 +3572,12 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c
+     CRM_ASSERT(rsc);
+     CRM_ASSERT(xml_op);
+ 
+-    if (rc == PCMK_OCF_NOT_RUNNING) {
+-        clear_past_failure = TRUE;
+-
+-    } else if (rc == PCMK_OCF_NOT_INSTALLED) {
++    if (rc == PCMK_OCF_NOT_INSTALLED || (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op))) {
+         rsc->role = RSC_ROLE_STOPPED;
+ 
++    } else if (rc == PCMK_OCF_NOT_RUNNING) {
++        clear_past_failure = TRUE;
++
+     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
+         if (last_failure) {
+             const char *op_key = get_op_key(xml_op);
+@@ -3661,8 +3666,10 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+               pe_working_set_t *data_set)
+ {
+     int rc = 0;
++    int old_rc = 0;
+     int task_id = 0;
+     int target_rc = 0;
++    int old_target_rc = 0;
+     int status = PCMK_EXEC_UNKNOWN;
+     guint interval_ms = 0;
+     const char *task = NULL;
+@@ -3671,6 +3678,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+     bool expired = false;
+     pe_resource_t *parent = rsc;
+     enum action_fail_response failure_strategy = action_fail_recover;
++    bool maskable_probe_failure = false;
+ 
+     CRM_CHECK(rsc && node && xml_op, return);
+ 
+@@ -3727,10 +3735,22 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+         expired = true;
+     }
+ 
++    old_rc = rc;
++    old_target_rc = target_rc;
++
+     remap_operation(xml_op, rsc, node, data_set, on_fail, target_rc,
+                     &rc, &status);
+ 
+-    if (expired && (rc != target_rc)) {
++    maskable_probe_failure = !pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op);
++
++    if (expired && maskable_probe_failure && old_rc != old_target_rc) {
++        if (rsc->role <= RSC_ROLE_STOPPED) {
++            rsc->role = RSC_ROLE_UNKNOWN;
++        }
++
++        goto done;
++
++    } else if (expired && (rc != target_rc)) {
+         const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
+ 
+         if (interval_ms == 0) {
+@@ -3758,6 +3778,18 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+         }
+     }
+ 
++    if (maskable_probe_failure) {
++        crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
++                   services_ocf_exitcode_str(rc), rsc->id, node->details->uname);
++        update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
++                              on_fail, data_set);
++        crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
++
++        record_failed_op(xml_op, node, rsc, data_set);
++        resource_location(parent, node, -INFINITY, "masked-probe-failure", data_set);
++        goto done;
++    }
++
+     switch (status) {
+         case PCMK_EXEC_CANCELLED:
+             // Should never happen
+-- 
+2.27.0
+
+
+From ccff6eb60598f389008b0621447056457da79671 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Tue, 4 Jan 2022 10:14:48 -0500
+Subject: [PATCH 21/21] Test: scheduler: Add tests for expired, masked probe
+ failures.
+
+dummy-1 is a stopped resource with an expired masked probe failure.
+This probe should be rescheduled.  dummy-2 is a started resource with an
+expired masked probe failure.  This probe should not be rescheduled.
+---
+ cts/cts-scheduler.in                          |  1 +
+ .../dot/expired-failed-probe-primitive.dot    |  8 ++
+ .../exp/expired-failed-probe-primitive.exp    | 45 ++++++++++++
+ .../expired-failed-probe-primitive.scores     |  7 ++
+ .../expired-failed-probe-primitive.summary    | 26 +++++++
+ .../xml/expired-failed-probe-primitive.xml    | 73 +++++++++++++++++++
+ 6 files changed, 160 insertions(+)
+ create mode 100644 cts/scheduler/dot/expired-failed-probe-primitive.dot
+ create mode 100644 cts/scheduler/exp/expired-failed-probe-primitive.exp
+ create mode 100644 cts/scheduler/scores/expired-failed-probe-primitive.scores
+ create mode 100644 cts/scheduler/summary/expired-failed-probe-primitive.summary
+ create mode 100644 cts/scheduler/xml/expired-failed-probe-primitive.xml
+
+diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
+index 3abcbc6c9d..7bc41a0936 100644
+--- a/cts/cts-scheduler.in
++++ b/cts/cts-scheduler.in
+@@ -115,6 +115,7 @@ TESTS = [
+         [ "probe-pending-node", "Probe (pending node + unmanaged resource)" ],
+         [ "failed-probe-primitive", "Maskable vs. unmaskable probe failures on primitive resources" ],
+         [ "failed-probe-clone", "Maskable vs. unmaskable probe failures on cloned resources" ],
++        [ "expired-failed-probe-primitive", "Maskable, expired probe failure on primitive resources" ],
+         [ "standby", "Standby" ],
+         [ "comments", "Comments" ],
+     ],
+diff --git a/cts/scheduler/dot/expired-failed-probe-primitive.dot b/cts/scheduler/dot/expired-failed-probe-primitive.dot
+new file mode 100644
+index 0000000000..610c2b8047
+--- /dev/null
++++ b/cts/scheduler/dot/expired-failed-probe-primitive.dot
+@@ -0,0 +1,8 @@
++ digraph "g" {
++"dummy-1_monitor_0 cluster01" -> "dummy-1_start_0 cluster02" [ style = bold]
++"dummy-1_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"]
++"dummy-1_monitor_0 cluster02" -> "dummy-1_start_0 cluster02" [ style = bold]
++"dummy-1_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"dummy-1_start_0 cluster02" [ style=bold color="green" fontcolor="black"]
++"dummy-2_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"]
++}
+diff --git a/cts/scheduler/exp/expired-failed-probe-primitive.exp b/cts/scheduler/exp/expired-failed-probe-primitive.exp
+new file mode 100644
+index 0000000000..3c2cbfe411
+--- /dev/null
++++ b/cts/scheduler/exp/expired-failed-probe-primitive.exp
+@@ -0,0 +1,45 @@
++<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="INFINITY"  transition_id="0">
++  <synapse id="0">
++    <action_set>
++      <rsc_op id="7" operation="start" operation_key="dummy-1_start_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="dummy-1" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs>
++      <trigger>
++        <rsc_op id="2" operation="monitor" operation_key="dummy-1_monitor_0" on_node="cluster01" on_node_uuid="1"/>
++      </trigger>
++      <trigger>
++        <rsc_op id="4" operation="monitor" operation_key="dummy-1_monitor_0" on_node="cluster02" on_node_uuid="2"/>
++      </trigger>
++    </inputs>
++  </synapse>
++  <synapse id="1">
++    <action_set>
++      <rsc_op id="4" operation="monitor" operation_key="dummy-1_monitor_0" on_node="cluster02" on_node_uuid="2">
++        <primitive id="dummy-1" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster02" CRM_meta_on_node_uuid="2" CRM_meta_op_target_rc="7" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="2">
++    <action_set>
++      <rsc_op id="2" operation="monitor" operation_key="dummy-1_monitor_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="dummy-1" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_op_target_rc="7" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs/>
++  </synapse>
++  <synapse id="3">
++    <action_set>
++      <rsc_op id="3" operation="monitor" operation_key="dummy-2_monitor_0" on_node="cluster01" on_node_uuid="1">
++        <primitive id="dummy-2" class="ocf" provider="pacemaker" type="Dummy"/>
++        <attributes CRM_meta_on_node="cluster01" CRM_meta_on_node_uuid="1" CRM_meta_op_target_rc="7" CRM_meta_timeout="20000" />
++      </rsc_op>
++    </action_set>
++    <inputs/>
++  </synapse>
++</transition_graph>
+diff --git a/cts/scheduler/scores/expired-failed-probe-primitive.scores b/cts/scheduler/scores/expired-failed-probe-primitive.scores
+new file mode 100644
+index 0000000000..51ae5510e6
+--- /dev/null
++++ b/cts/scheduler/scores/expired-failed-probe-primitive.scores
+@@ -0,0 +1,7 @@
++
++pcmk__native_allocate: Fencing allocation score on cluster01: 0
++pcmk__native_allocate: Fencing allocation score on cluster02: 0
++pcmk__native_allocate: dummy-1 allocation score on cluster01: 0
++pcmk__native_allocate: dummy-1 allocation score on cluster02: 0
++pcmk__native_allocate: dummy-2 allocation score on cluster01: 0
++pcmk__native_allocate: dummy-2 allocation score on cluster02: 0
+diff --git a/cts/scheduler/summary/expired-failed-probe-primitive.summary b/cts/scheduler/summary/expired-failed-probe-primitive.summary
+new file mode 100644
+index 0000000000..ac0604e84f
+--- /dev/null
++++ b/cts/scheduler/summary/expired-failed-probe-primitive.summary
+@@ -0,0 +1,26 @@
++Current cluster status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * dummy-1	(ocf:pacemaker:Dummy):	 Stopped
++    * dummy-2	(ocf:pacemaker:Dummy):	 Started cluster02
++
++Transition Summary:
++  * Start      dummy-1     ( cluster02 )
++
++Executing Cluster Transition:
++  * Resource action: dummy-1         monitor on cluster02
++  * Resource action: dummy-1         monitor on cluster01
++  * Resource action: dummy-2         monitor on cluster01
++  * Resource action: dummy-1         start on cluster02
++
++Revised Cluster Status:
++  * Node List:
++    * Online: [ cluster01 cluster02 ]
++
++  * Full List of Resources:
++    * Fencing	(stonith:fence_xvm):	 Started cluster01
++    * dummy-1	(ocf:pacemaker:Dummy):	 Started cluster02
++    * dummy-2	(ocf:pacemaker:Dummy):	 Started cluster02
+diff --git a/cts/scheduler/xml/expired-failed-probe-primitive.xml b/cts/scheduler/xml/expired-failed-probe-primitive.xml
+new file mode 100644
+index 0000000000..684aa73f92
+--- /dev/null
++++ b/cts/scheduler/xml/expired-failed-probe-primitive.xml
+@@ -0,0 +1,73 @@
++<cib crm_feature_set="3.3.0" validate-with="pacemaker-3.3" epoch="1" num_updates="37" admin_epoch="1" cib-last-written="Tue Jan  4 10:00:00 2021" update-origin="cluster01" update-client="crmd" update-user="hacluster" have-quorum="1" dc-uuid="2">
++  <configuration>
++    <crm_config>
++      <cluster_property_set id="cib-bootstrap-options">
++        <nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" value="false"/>
++        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.0.4-1.e97f9675f.git.el7-e97f9675f"/>
++        <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
++        <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="test-cluster"/>
++        <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
++        <nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
++      </cluster_property_set>
++    </crm_config>
++    <nodes>
++      <node id="1" uname="cluster01"/>
++      <node id="2" uname="cluster02"/>
++    </nodes>
++    <resources>
++      <primitive class="stonith" id="Fencing" type="fence_xvm">
++        <instance_attributes id="Fencing-instance_attributes">
++          <nvpair id="Fencing-instance_attributes-ip_family" name="ip_family" value="ipv4"/>
++        </instance_attributes>
++        <operations>
++          <op id="Fencing-monitor-interval-60s" interval="60s" name="monitor"/>
++        </operations>
++      </primitive>
++      <primitive class="ocf" id="dummy-1" provider="pacemaker" type="Dummy">
++        <meta_attributes id="dummy-1-meta_attributes">
++          <nvpair id="dummy-1-meta_attributes-failure-timeout" name="failure-timeout" value="10"/>
++        </meta_attributes>
++      </primitive>
++      <primitive class="ocf" id="dummy-2" provider="pacemaker" type="Dummy">
++        <meta_attributes id="dummy-2-meta_attributes">
++          <nvpair id="dummy-2-meta_attributes-failure-timeout" name="failure-timeout" value="10"/>
++        </meta_attributes>
++      </primitive>
++    </resources>
++    <constraints/>
++  </configuration>
++  <status>
++    <node_state id="1" uname="cluster01" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="1">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:0;3:1:0:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster01" call-id="3" rc-code="0" op-status="0" interval="0" last-rc-change="1588951272" exec-time="36" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++            <lrm_rsc_op id="Fencing_monitor_60000" operation_key="Fencing_monitor_60000" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;4:-1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="4" rc-code="0" op-status="0" interval="60000" last-rc-change="1590608589" exec-time="0" queue-time="0" op-digest="a88218bb6c7dc47e6586fc75fc2a8d69"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-1_last_failure_0" operation_key="dummy-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="5" rc-code="5" op-status="0" interval="0" last-rc-change="1590608589" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;5:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster01" call-id="6" rc-code="5" op-status="0" interval="0" last-rc-change="1590608589" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++    <node_state id="2" uname="cluster02" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
++      <lrm id="2">
++        <lrm_resources>
++          <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
++            <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.3.0" transition-key="1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" transition-magic="0:7;1:0:7:4a9e64d6-e1dd-4395-917c-1596312eafe4" exit-reason="" on_node="cluster02" call-id="1" rc-code="7" op-status="0" interval="0" last-rc-change="1588951263" exec-time="3" queue-time="0" op-digest="7da16842ab2328e41f737cab5e5fc89c"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-1" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-1_last_failure_0" operation_key="dummy-1_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="2" rc-code="5" op-status="0" interval="0" last-rc-change="1590608589" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++          <lrm_resource id="dummy-2" class="ocf" provider="pacemaker" type="Dummy">
++            <lrm_rsc_op id="dummy-2_last_failure_0" operation_key="dummy-2_monitor_0" operation="monitor" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:5;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="7" rc-code="5" op-status="0" interval="0" last-rc-change="1590608589" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++            <lrm_rsc_op id="dummy-2_last_0" operation_key="dummy-2_start_0" operation="start" crm-debug-origin="crm_simulate" crm_feature_set="3.3.0" transition-key="2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" transition-magic="0:0;2:1:0:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" exit-reason="" on_node="cluster02" call-id="8" rc-code="0" op-status="0" interval="0" last-rc-change="1590609000" exec-time="33" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
++          </lrm_resource>
++        </lrm_resources>
++      </lrm>
++    </node_state>
++  </status>
++</cib>
+-- 
+2.27.0
+
diff --git a/SOURCES/011-fencing-reasons.patch b/SOURCES/011-fencing-reasons.patch
new file mode 100644
index 0000000..4422ca0
--- /dev/null
+++ b/SOURCES/011-fencing-reasons.patch
@@ -0,0 +1,1450 @@
+From 6db8e3adef0441953ec18dd0339c0a67c5c26bdf Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 14 Dec 2021 16:25:21 -0600
+Subject: [PATCH 01/17] Doc: Pacemaker Development: update for recent function
+ renames
+
+---
+ doc/sphinx/Pacemaker_Development/components.rst | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst
+index a51220cac9..68158484ce 100644
+--- a/doc/sphinx/Pacemaker_Development/components.rst
++++ b/doc/sphinx/Pacemaker_Development/components.rst
+@@ -106,7 +106,7 @@ or messaging layer callback, which calls:
+       the number of active peers), and if this is the last expected reply,
+       calls
+ 
+-      * ``call_remote_stonith()``, which calculates the timeout and sends
++      * ``request_peer_fencing()``, which calculates the timeout and sends
+         ``STONITH_OP_FENCE`` request(s) to carry out the fencing. If the target
+ 	node has a fencing "topology" (which allows specifications such as
+ 	"this node can be fenced either with device A, or devices B and C in
+@@ -156,7 +156,7 @@ returns, and calls
+   * done callback (``st_child_done()``), which calls ``schedule_stonith_command()``
+     for a new device if there are further required actions to execute or if the
+     original action failed, then builds and sends an XML reply to the original
+-    fencer (via ``stonith_send_async_reply()``), then checks whether any
++    fencer (via ``send_async_reply()``), then checks whether any
+     pending actions are the same as the one just executed and merges them if so.
+ 
+ Fencing replies
+@@ -169,18 +169,18 @@ messaging layer callback, which calls:
+ 
+   * ``handle_reply()``, which calls
+ 
+-    * ``process_remote_stonith_exec()``, which calls either
+-      ``call_remote_stonith()`` (to retry a failed operation, or try the next
+-       device in a topology is appropriate, which issues a new
++    * ``fenced_process_fencing_reply()``, which calls either
++      ``request_peer_fencing()`` (to retry a failed operation, or try the next
++      device in a topology is appropriate, which issues a new
+       ``STONITH_OP_FENCE`` request, proceeding as before) or
+-      ``remote_op_done()`` (if the operation is definitively failed or
++      ``finalize_op()`` (if the operation is definitively failed or
+       successful).
+ 
+-      * remote_op_done() broadcasts the result to all peers.
++      * ``finalize_op()`` broadcasts the result to all peers.
+ 
+ Finally, all peers receive the broadcast result and call
+ 
+-* ``remote_op_done()``, which sends the result to all local clients.
++* ``finalize_op()``, which sends the result to all local clients.
+ 
+ 
+ .. index::
+-- 
+2.27.0
+
+
+From 47db9e5fb410b1e911710727d646eb7180a70c90 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 12 Nov 2021 09:58:16 -0600
+Subject: [PATCH 02/17] Refactor: fencing: add full result to fence action
+ callback data
+
+stonith_callback_data_t previously only contained the legacy return code for
+the action. Use its new opaque member to store the full result, along with
+accessors (available only internally for now).
+---
+ include/crm/fencing/internal.h |  3 ++
+ lib/fencing/st_client.c        | 99 ++++++++++++++++++++++++++--------
+ 2 files changed, 81 insertions(+), 21 deletions(-)
+
+diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
+index f0d294a0b3..eff689e59b 100644
+--- a/include/crm/fencing/internal.h
++++ b/include/crm/fencing/internal.h
+@@ -187,6 +187,9 @@ bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
+ bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
+ 
+ int stonith__legacy2status(int rc);
++int stonith__exit_status(stonith_callback_data_t *data);
++int stonith__execution_status(stonith_callback_data_t *data);
++const char *stonith__exit_reason(stonith_callback_data_t *data);
+ 
+ /*!
+  * \internal
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 2ca094566b..9d93ffd481 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -854,20 +854,23 @@ stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks)
+  * \param[in] st        Fencer API connection
+  * \param[in] call_id   If positive, call ID of completed fence action, otherwise
+  *                      legacy return code for early action failure
+- * \param[in] rc        Legacy return code for action result
++ * \param[in] result    Full result for action
+  * \param[in] userdata  User data to pass to callback
+  * \param[in] callback  Fence action callback to invoke
+  */
+ static void
+-invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata,
++invoke_fence_action_callback(stonith_t *st, int call_id,
++                             pcmk__action_result_t *result,
++                             void *userdata,
+                              void (*callback) (stonith_t *st,
+                                                stonith_callback_data_t *data))
+ {
+     stonith_callback_data_t data = { 0, };
+ 
+     data.call_id = call_id;
+-    data.rc = rc;
++    data.rc = pcmk_rc2legacy(stonith__result2rc(result));
+     data.userdata = userdata;
++    data.opaque = (void *) result;
+ 
+     callback(st, &data);
+ }
+@@ -888,7 +891,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
+ {
+     stonith_private_t *private = NULL;
+     stonith_callback_client_t *cb_info = NULL;
+-    int rc = pcmk_ok;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     CRM_CHECK(stonith != NULL, return);
+     CRM_CHECK(stonith->st_private != NULL, return);
+@@ -897,20 +900,17 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
+ 
+     if (msg == NULL) {
+         // Fencer didn't reply in time
+-        rc = -ETIME;
++        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
++                         "Timeout waiting for reply from fencer");
+         CRM_LOG_ASSERT(call_id > 0);
+ 
+     } else {
+         // We have the fencer reply
+-
+-        if (crm_element_value_int(msg, F_STONITH_RC, &rc) != 0) {
+-            rc = -pcmk_err_generic;
+-        }
+-
+         if ((crm_element_value_int(msg, F_STONITH_CALLID, &call_id) != 0)
+             || (call_id <= 0)) {
+             crm_log_xml_warn(msg, "Bad fencer reply");
+         }
++        stonith__xe_get_result(msg, &result);
+     }
+ 
+     if (call_id > 0) {
+@@ -919,27 +919,29 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
+     }
+ 
+     if ((cb_info != NULL) && (cb_info->callback != NULL)
+-        && (rc == pcmk_ok || !(cb_info->only_success))) {
++        && (pcmk__result_ok(&result) || !(cb_info->only_success))) {
+         crm_trace("Invoking callback %s for call %d",
+                   crm_str(cb_info->id), call_id);
+-        invoke_fence_action_callback(stonith, call_id, rc, cb_info->user_data,
+-                                     cb_info->callback);
++        invoke_fence_action_callback(stonith, call_id, &result,
++                                     cb_info->user_data, cb_info->callback);
+ 
+-    } else if ((private->op_callback == NULL) && (rc != pcmk_ok)) {
+-        crm_warn("Fencing action without registered callback failed: %s",
+-                 pcmk_strerror(rc));
++    } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) {
++        crm_warn("Fencing action without registered callback failed: %d (%s)",
++                 result.exit_status,
++                 pcmk_exec_status_str(result.execution_status));
+         crm_log_xml_debug(msg, "Failed fence update");
+     }
+ 
+     if (private->op_callback != NULL) {
+         crm_trace("Invoking global callback for call %d", call_id);
+-        invoke_fence_action_callback(stonith, call_id, rc, NULL,
++        invoke_fence_action_callback(stonith, call_id, &result, NULL,
+                                      private->op_callback);
+     }
+ 
+     if (cb_info != NULL) {
+         stonith_api_del_callback(stonith, call_id, FALSE);
+     }
++    pcmk__reset_result(&result);
+ }
+ 
+ static gboolean
+@@ -1252,14 +1254,18 @@ stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int opti
+     CRM_CHECK(stonith->st_private != NULL, return -EINVAL);
+     private = stonith->st_private;
+ 
+-    if (call_id == 0) {
++    if (call_id == 0) { // Add global callback
+         private->op_callback = callback;
+ 
+-    } else if (call_id < 0) {
++    } else if (call_id < 0) { // Call failed immediately, so call callback now
+         if (!(options & st_opt_report_only_success)) {
++            pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
+             crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id));
+-            invoke_fence_action_callback(stonith, call_id, call_id, user_data,
+-                                         callback);
++            pcmk__set_result(&result, CRM_EX_ERROR,
++                             stonith__legacy2status(call_id), NULL);
++            invoke_fence_action_callback(stonith, call_id, &result,
++                                         user_data, callback);
+         } else {
+             crm_warn("Fencer call failed: %s", pcmk_strerror(call_id));
+         }
+@@ -2293,6 +2299,57 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
+     freeXpathObject(xpath);
+ }
+ 
++/*!
++ * \internal
++ * \brief Return the exit status from an async action callback
++ *
++ * \param[in] data  Callback data
++ *
++ * \return Exit status from callback data
++ */
++int
++stonith__exit_status(stonith_callback_data_t *data)
++{
++    if ((data == NULL) || (data->opaque == NULL)) {
++        return CRM_EX_ERROR;
++    }
++    return ((pcmk__action_result_t *) data->opaque)->exit_status;
++}
++
++/*!
++ * \internal
++ * \brief Return the execution status from an async action callback
++ *
++ * \param[in] data  Callback data
++ *
++ * \return Execution status from callback data
++ */
++int
++stonith__execution_status(stonith_callback_data_t *data)
++{
++    if ((data == NULL) || (data->opaque == NULL)) {
++        return PCMK_EXEC_UNKNOWN;
++    }
++    return ((pcmk__action_result_t *) data->opaque)->execution_status;
++}
++
++/*!
++ * \internal
++ * \brief Return the exit reason from an async action callback
++ *
++ * \param[in] data  Callback data
++ *
++ * \return Exit reason from callback data
++ */
++const char *
++stonith__exit_reason(stonith_callback_data_t *data)
++{
++    if ((data == NULL) || (data->opaque == NULL)) {
++        return NULL;
++    }
++    return ((pcmk__action_result_t *) data->opaque)->exit_reason;
++}
++
+ // Deprecated functions kept only for backward API compatibility
+ // LCOV_EXCL_START
+ 
+-- 
+2.27.0
+
+
+From 1e076370ef4ac7993b5ff21ed1cdfb3c4a494cf0 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 9 Nov 2021 16:16:03 -0600
+Subject: [PATCH 03/17] Log: controller: improve fencing result messages
+
+Now that fence callbacks get the full result, we can log a better message.
+Also check for error conditions better, improve message wording, and ensure
+only a single message is logged per result.
+---
+ daemons/controld/controld_fencing.c | 83 +++++++++++++++++++----------
+ 1 file changed, 56 insertions(+), 27 deletions(-)
+
+diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
+index f5a252c813..f8d2fc13f4 100644
+--- a/daemons/controld/controld_fencing.c
++++ b/daemons/controld/controld_fencing.c
+@@ -714,45 +714,64 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+     int stonith_id = -1;
+     int transition_id = -1;
+     crm_action_t *action = NULL;
+-    int call_id = data->call_id;
+-    int rc = data->rc;
+-    char *userdata = data->userdata;
+-
+-    CRM_CHECK(userdata != NULL, return);
+-    crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
+-               pcmk_strerror(rc), rc);
++    const char *target = NULL;
+ 
+-    if (AM_I_DC == FALSE) {
++    if ((data == NULL) || (data->userdata == NULL)) {
++        crm_err("Ignoring fence operation %d result: "
++                "No transition key given (bug?)",
++                ((data == NULL)? -1 : data->call_id));
+         return;
+     }
+ 
+-    /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
+-    /*       op->call_id, op->optype, op->node_name, op->op_result, */
+-    /*       (char *)op->node_list, op->private_data); */
++    if (!AM_I_DC) {
++        const char *reason = stonith__exit_reason(data);
++
++        if (reason == NULL) {
++           reason = pcmk_exec_status_str(stonith__execution_status(data));
++        }
++        crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s",
++                   data->call_id, stonith__exit_status(data), reason,
++                   (const char *) data->userdata);
++        return;
++    }
+ 
+-    /* filter out old STONITH actions */
+-    CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, NULL),
++    CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id,
++                                    &stonith_id, NULL),
+               goto bail);
+ 
+-    if (transition_graph->complete || stonith_id < 0 || !pcmk__str_eq(uuid, te_uuid, pcmk__str_casei)
+-        || transition_graph->id != transition_id) {
+-        crm_info("Ignoring STONITH action initiated outside of the current transition");
++    if (transition_graph->complete || (stonith_id < 0)
++        || !pcmk__str_eq(uuid, te_uuid, pcmk__str_none)
++        || (transition_graph->id != transition_id)) {
++        crm_info("Ignoring fence operation %d result: "
++                 "Not from current transition " CRM_XS
++                 " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)",
++                 data->call_id, pcmk__btoa(transition_graph->complete),
++                 stonith_id, uuid, te_uuid, transition_id, transition_graph->id);
+         goto bail;
+     }
+ 
+     action = controld_get_action(stonith_id);
+     if (action == NULL) {
+-        crm_err("Stonith action not matched");
++        crm_err("Ignoring fence operation %d result: "
++                "Action %d not found in transition graph (bug?) "
++                CRM_XS " uuid=%s transition=%d",
++                data->call_id, stonith_id, uuid, transition_id);
++        goto bail;
++    }
++
++    target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
++    if (target == NULL) {
++        crm_err("Ignoring fence operation %d result: No target given (bug?)",
++                data->call_id);
+         goto bail;
+     }
+ 
+     stop_te_timer(action->timer);
+-    if (rc == pcmk_ok) {
+-        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
++    if (stonith__exit_status(data) == CRM_EX_OK) {
+         const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
+         const char *op = crm_meta_value(action->params, "stonith_action");
+ 
+-        crm_info("Stonith operation %d for %s passed", call_id, target);
++        crm_notice("Fence operation %d for %s passed", data->call_id, target);
+         if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) {
+             te_action_confirmed(action, NULL);
+             if (pcmk__str_eq("on", op, pcmk__str_casei)) {
+@@ -791,20 +810,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+         st_fail_count_reset(target);
+ 
+     } else {
+-        const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+         enum transition_action abort_action = tg_restart;
++        int status = stonith__execution_status(data);
++        const char *reason = stonith__exit_reason(data);
+ 
++        if (reason == NULL) {
++            if (status == PCMK_EXEC_DONE) {
++                reason = "Agent returned error";
++            } else {
++                reason = pcmk_exec_status_str(status);
++            }
++        }
+         crm__set_graph_action_flags(action, pcmk__graph_action_failed);
+-        crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
+-                   call_id, target, pcmk_strerror(rc));
+ 
+         /* If no fence devices were available, there's no use in immediately
+          * checking again, so don't start a new transition in that case.
+          */
+-        if (rc == -ENODEV) {
+-            crm_warn("No devices found in cluster to fence %s, giving up",
+-                     target);
++        if (status == PCMK_EXEC_NO_FENCE_DEVICE) {
++            crm_warn("Fence operation %d for %s failed: %s "
++                     "(aborting transition and giving up for now)",
++                     data->call_id, target, reason);
+             abort_action = tg_stop;
++        } else {
++            crm_notice("Fence operation %d for %s failed: %s "
++                       "(aborting transition)", data->call_id, target, reason);
+         }
+ 
+         /* Increment the fail count now, so abort_for_stonith_failure() can
+@@ -818,7 +847,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+     trigger_graph();
+ 
+   bail:
+-    free(userdata);
++    free(data->userdata);
+     free(uuid);
+     return;
+ }
+-- 
+2.27.0
+
+
+From 25547e3b7e6eb23efad1c359388d6e8d0df62363 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 22 Nov 2021 12:37:16 -0600
+Subject: [PATCH 04/17] Refactor: executor: drop action_get_uniform_rc()
+ function
+
+action_get_uniform_rc() called stonith2uniform_rc() or services_result2ocf() as
+appropriate to the action standard. However, it was called only from a place
+that did not process stonith actions, so that place can just call
+services_result2ocf() directly.
+
+This will simplify planned changes.
+---
+ daemons/execd/execd_commands.c | 24 ++++++------------------
+ 1 file changed, 6 insertions(+), 18 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 5bb2aab692..5e123e322e 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -780,23 +780,6 @@ stonith2uniform_rc(const char *action, int rc)
+     return rc;
+ }
+ 
+-static int
+-action_get_uniform_rc(svc_action_t *action)
+-{
+-    lrmd_cmd_t *cmd = action->cb_data;
+-
+-    if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_STONITH,
+-                            pcmk__str_casei)) {
+-        return stonith2uniform_rc(cmd->action, action->rc);
+-    } else {
+-        enum ocf_exitcode code = services_result2ocf(action->standard,
+-                                                     cmd->action, action->rc);
+-
+-        // Cast variable instead of function return to keep compilers happy
+-        return (int) code;
+-    }
+-}
+-
+ struct notify_new_client_data {
+     xmlNode *notify;
+     pcmk__client_t *new_client;
+@@ -848,6 +831,7 @@ action_complete(svc_action_t * action)
+ {
+     lrmd_rsc_t *rsc;
+     lrmd_cmd_t *cmd = action->cb_data;
++    enum ocf_exitcode code;
+ 
+ #ifdef PCMK__TIME_USE_CGT
+     const char *rclass = NULL;
+@@ -867,8 +851,12 @@ action_complete(svc_action_t * action)
+ #endif
+ 
+     cmd->last_pid = action->pid;
+-    pcmk__set_result(&(cmd->result), action_get_uniform_rc(action),
++
++    // Cast variable instead of function return to keep compilers happy
++    code = services_result2ocf(action->standard, cmd->action, action->rc);
++    pcmk__set_result(&(cmd->result), (int) code,
+                      action->status, services__exit_reason(action));
++
+     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
+ 
+ #ifdef PCMK__TIME_USE_CGT
+-- 
+2.27.0
+
+
+From b5e31ba2539da4e94c124c3f0c8c72f7039f9a7a Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 22 Nov 2021 12:39:30 -0600
+Subject: [PATCH 05/17] Feature: executor: use full result from fencer for
+ fence actions
+
+Now that fence callbacks get the full result, we can improve the executor
+command result for fence actions. stonith_action_complete() now takes a
+full result, allowing the executor to use that directly rather than map a
+legacy return code.
+---
+ daemons/execd/execd_commands.c | 140 +++++++++++++++++++--------------
+ 1 file changed, 80 insertions(+), 60 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 5e123e322e..e722994012 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -8,6 +8,7 @@
+  */
+ 
+ #include <crm_internal.h>
++#include <crm/fencing/internal.h>
+ 
+ #include <glib.h>
+ 
+@@ -748,38 +749,6 @@ cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
+     }
+ }
+ 
+-static int
+-stonith2uniform_rc(const char *action, int rc)
+-{
+-    switch (rc) {
+-        case pcmk_ok:
+-            rc = PCMK_OCF_OK;
+-            break;
+-
+-        case -ENODEV:
+-            /* This should be possible only for probes in practice, but
+-             * interpret for all actions to be safe.
+-             */
+-            if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
+-                rc = PCMK_OCF_NOT_RUNNING;
+-            } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) {
+-                rc = PCMK_OCF_OK;
+-            } else {
+-                rc = PCMK_OCF_NOT_INSTALLED;
+-            }
+-            break;
+-
+-        case -EOPNOTSUPP:
+-            rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
+-            break;
+-
+-        default:
+-            rc = PCMK_OCF_UNKNOWN_ERROR;
+-            break;
+-    }
+-    return rc;
+-}
+-
+ struct notify_new_client_data {
+     xmlNode *notify;
+     pcmk__client_t *new_client;
+@@ -988,46 +957,84 @@ action_complete(svc_action_t * action)
+     cmd_finalize(cmd, rsc);
+ }
+ 
++/*!
++ * \internal
++ * \brief Process the result of a fence device action (start, stop, or monitor)
++ *
++ * \param[in] cmd               Fence device action that completed
++ * \param[in] exit_status       Fencer API exit status for action
++ * \param[in] execution_status  Fencer API execution status for action
++ * \param[in] exit_reason       Human-friendly detail, if action failed
++ */
+ static void
+-stonith_action_complete(lrmd_cmd_t * cmd, int rc)
++stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
++                        enum pcmk_exec_status execution_status,
++                        const char *exit_reason)
+ {
+     // This can be NULL if resource was removed before command completed
+     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
+ 
+-    cmd->result.exit_status = stonith2uniform_rc(cmd->action, rc);
++    // Simplify fencer exit status to uniform exit status
++    if (exit_status != CRM_EX_OK) {
++        exit_status = PCMK_OCF_UNKNOWN_ERROR;
++    }
+ 
+-    /* This function may be called with status already set to cancelled, if a
+-     * pending action was aborted. Otherwise, we need to determine status from
+-     * the fencer return code.
+-     */
+-    if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) {
+-        cmd->result.execution_status = stonith__legacy2status(rc);
++    if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
++        /* An in-flight fence action was cancelled. The execution status is
++         * already correct, so don't overwrite it.
++         */
++        execution_status = PCMK_EXEC_CANCELLED;
+ 
+-        // Simplify status codes from fencer
+-        switch (cmd->result.execution_status) {
++    } else {
++        /* Some execution status codes have specific meanings for the fencer
++         * that executor clients may not expect, so map them to a simple error
++         * status.
++         */
++        switch (execution_status) {
+             case PCMK_EXEC_NOT_CONNECTED:
+             case PCMK_EXEC_INVALID:
+-            case PCMK_EXEC_NO_FENCE_DEVICE:
+             case PCMK_EXEC_NO_SECRETS:
+-                cmd->result.execution_status = PCMK_EXEC_ERROR;
++                execution_status = PCMK_EXEC_ERROR;
+                 break;
+-            default:
++
++            case PCMK_EXEC_NO_FENCE_DEVICE:
++                /* This should be possible only for probes in practice, but
++                 * interpret for all actions to be safe.
++                 */
++                if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS,
++                                 pcmk__str_none)) {
++                    exit_status = PCMK_OCF_NOT_RUNNING;
++
++                } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP,
++                                        pcmk__str_none)) {
++                    exit_status = PCMK_OCF_OK;
++
++                } else {
++                    exit_status = PCMK_OCF_NOT_INSTALLED;
++                }
++                execution_status = PCMK_EXEC_ERROR;
+                 break;
+-        }
+ 
+-        // Certain successful actions change the known state of the resource
+-        if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
+-            if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+-                rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
+-            } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+-                rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
+-            }
++            case PCMK_EXEC_NOT_SUPPORTED:
++                exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
++                break;
++
++            default:
++                break;
+         }
+     }
+ 
+-    // Give the user more detail than an OCF code
+-    if (rc != -pcmk_err_generic) {
+-        cmd->result.exit_reason = strdup(pcmk_strerror(rc));
++    pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
++
++    // Certain successful actions change the known state of the resource
++    if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
++
++        if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
++            rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
++
++        } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
++            rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
++        }
+     }
+ 
+     /* The recurring timer should not be running at this point in any case, but
+@@ -1050,7 +1057,15 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc)
+ static void
+ lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
+ {
+-    stonith_action_complete(data->userdata, data->rc);
++    if ((data == NULL) || (data->userdata == NULL)) {
++        crm_err("Ignoring fence action result: "
++                "Invalid callback arguments (bug?)");
++    } else {
++        stonith_action_complete((lrmd_cmd_t *) data->userdata,
++                                stonith__exit_status(data),
++                                stonith__execution_status(data),
++                                stonith__exit_reason(data));
++    }
+ }
+ 
+ void
+@@ -1097,7 +1112,9 @@ stonith_connection_failed(void)
+     crm_err("Connection to fencer failed, finalizing %d pending operations",
+             g_list_length(cmd_list));
+     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
+-        stonith_action_complete(cmd_iter->data, -ENOTCONN);
++        stonith_action_complete((lrmd_cmd_t *) cmd_iter->data,
++                                CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
++                                "Lost connection to fencer");
+     }
+     g_list_free(cmd_list);
+ }
+@@ -1210,7 +1227,7 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+ 
+     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+         rc = execd_stonith_start(stonith_api, rsc, cmd);
+-        if (rc == 0) {
++        if (rc == pcmk_ok) {
+             do_monitor = TRUE;
+         }
+ 
+@@ -1233,7 +1250,10 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+         }
+     }
+ 
+-    stonith_action_complete(cmd, rc);
++    stonith_action_complete(cmd,
++                            ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
++                            stonith__legacy2status(rc),
++                            rc == -pcmk_err_generic? NULL : pcmk_strerror(rc));
+ }
+ 
+ static int
+-- 
+2.27.0
+
+
+From 0cdc8506c2383cf05c2f62ab1ac9438958daf210 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 22 Nov 2021 16:15:05 -0600
+Subject: [PATCH 06/17] Fix: executor,scheduler: treat "no secrets" fence
+ results as a hard error
+
+Previously, the executor mapped the fencer's PCMK_EXEC_NO_SECRETS status to
+PCMK_EXEC_ERROR to keep handling of that situation the same as before the new
+code was added.
+
+However, the earlier handling was less than ideal -- a resource action that
+failed due to missing secrets would be retried on the same node, and almost
+certainly fail again for the same reason. Now, the executor passes along
+PCMK_EXEC_NO_SECRETS to clients; the controller will record the result in the
+CIB status, and the scheduler will treat it as a hard error (i.e. not retrying
+on the same node).
+
+Backward compatibility isn't a problem because the scheduler treats unknown
+status codes the same as PCMK_EXEC_ERROR, so an older DC will continue to
+handle it as before. The CRM feature set has been bumped so the handling can't
+flip back and forth in a mixed-version cluster.
+---
+ daemons/execd/execd_commands.c | 1 -
+ include/crm/crm.h              | 4 ++--
+ lib/pengine/unpack.c           | 3 ---
+ 3 files changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index e722994012..4ced6d1d5c 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -993,7 +993,6 @@ stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
+         switch (execution_status) {
+             case PCMK_EXEC_NOT_CONNECTED:
+             case PCMK_EXEC_INVALID:
+-            case PCMK_EXEC_NO_SECRETS:
+                 execution_status = PCMK_EXEC_ERROR;
+                 break;
+ 
+diff --git a/include/crm/crm.h b/include/crm/crm.h
+index 16b35e9c55..56b07cb12a 100644
+--- a/include/crm/crm.h
++++ b/include/crm/crm.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2004-2021 the Pacemaker project contributors
++ * Copyright 2004-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -66,7 +66,7 @@ extern "C" {
+  * >=3.0.13: Fail counts include operation name and interval
+  * >=3.2.0:  DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED
+  */
+-#  define CRM_FEATURE_SET		"3.12.0"
++#  define CRM_FEATURE_SET		"3.13.0"
+ 
+ /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
+  * recipient of a CPG message. This imposes an arbitrary limit on cluster node
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index 3e0384cd2a..8a2d2a6d6d 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3879,9 +3879,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+         case PCMK_EXEC_INVALID:
+             break; // Not done, do error handling
+ 
+-        /* These should only be possible in fence action results, not operation
+-         * history, but have some handling in place as a fail-safe.
+-         */
+         case PCMK_EXEC_NO_FENCE_DEVICE:
+         case PCMK_EXEC_NO_SECRETS:
+             status = PCMK_EXEC_ERROR_HARD;
+-- 
+2.27.0
+
+
+From 75c1bdcf3ffc406e6fa286fd5fcff83e1e65591a Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 12:05:20 -0600
+Subject: [PATCH 07/17] Low: executor: improve result for fence device probes
+
+Now that lrmd_rsc_execute_stonith() sets a full result instead of just a legacy
+return code, refactor lrmd_rsc_t's st_probe_rc as an execution status (and
+rename to fence_probe_result). Set an appropriate exit reason when available.
+---
+ daemons/execd/execd_commands.c  | 57 ++++++++++++++++++++++++++-------
+ daemons/execd/pacemaker-execd.h |  9 +++++-
+ 2 files changed, 54 insertions(+), 12 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 4ced6d1d5c..6e5505e973 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -285,7 +285,9 @@ build_rsc_from_xml(xmlNode * msg)
+     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
+     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
+     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
+-    rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
++
++    // Initialize fence device probes (to return "not running")
++    rsc->fence_probe_result = PCMK_EXEC_NO_FENCE_DEVICE;
+     return rsc;
+ }
+ 
+@@ -1029,10 +1031,10 @@ stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
+     if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
+ 
+         if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+-            rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
++            rsc->fence_probe_result = PCMK_EXEC_DONE; // "running"
+ 
+         } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
+-            rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
++            rsc->fence_probe_result = PCMK_EXEC_NO_FENCE_DEVICE; // "not running"
+         }
+     }
+ 
+@@ -1081,14 +1083,13 @@ stonith_connection_failed(void)
+         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
+             /* If we registered this fence device, we don't know whether the
+              * fencer still has the registration or not. Cause future probes to
+-             * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
+-             * started successfully. This is especially important if the
+-             * controller also went away (possibly due to a cluster layer
+-             * restart) and won't receive our client notification of any
+-             * monitors finalized below.
++             * return an error until the resource is stopped or started
++             * successfully. This is especially important if the controller also
++             * went away (possibly due to a cluster layer restart) and won't
++             * receive our client notification of any monitors finalized below.
+              */
+-            if (rsc->st_probe_rc == pcmk_ok) {
+-                rsc->st_probe_rc = pcmk_err_generic;
++            if (rsc->fence_probe_result == PCMK_EXEC_DONE) {
++                rsc->fence_probe_result = PCMK_EXEC_NOT_CONNECTED;
+             }
+ 
+             if (rsc->active) {
+@@ -1213,6 +1214,39 @@ execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
+     return rc;
+ }
+ 
++/*!
++ * \internal
++ * \brief  Finalize the result of a fence device probe
++ *
++ * \param[in] cmd           Probe action
++ * \param[in] probe_result  Probe result
++ */
++static void
++finalize_fence_device_probe(lrmd_cmd_t *cmd, enum pcmk_exec_status probe_result)
++{
++    int exit_status = CRM_EX_ERROR;
++    const char *reason = NULL;
++
++    switch (probe_result) {
++        case PCMK_EXEC_DONE: // Device is "running"
++            exit_status = CRM_EX_OK;
++            break;
++
++        case PCMK_EXEC_NO_FENCE_DEVICE: // Device is "not running"
++            break;
++
++        case PCMK_EXEC_NOT_CONNECTED: // stonith_connection_failed()
++            reason = "Lost connection to fencer";
++            break;
++
++        default: // Shouldn't be possible
++            probe_result = PCMK_EXEC_ERROR;
++            reason = "Invalid fence device probe result (bug?)";
++            break;
++    }
++    stonith_action_complete(cmd, exit_status, probe_result, reason);
++}
++
+ static void
+ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+ {
+@@ -1237,7 +1271,8 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+         if (cmd->interval_ms > 0) {
+             do_monitor = TRUE;
+         } else {
+-            rc = rsc->st_probe_rc;
++            finalize_fence_device_probe(cmd, rsc->fence_probe_result);
++            return;
+         }
+     }
+ 
+diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h
+index 51ef8d22e6..057d889584 100644
+--- a/daemons/execd/pacemaker-execd.h
++++ b/daemons/execd/pacemaker-execd.h
+@@ -41,7 +41,14 @@ typedef struct lrmd_rsc_s {
+      * that have been handed off from the pending ops list. */
+     GList *recurring_ops;
+ 
+-    int st_probe_rc; // What value should be returned for a probe if stonith
++    /* If this resource is a fence device, probes are handled internally by the
++     * executor, and this value indicates the result that should currently be
++     * returned for probes. It should be one of:
++     * PCMK_EXEC_DONE (to indicate "running"),
++     * PCMK_EXEC_NO_FENCE_DEVICE ("not running"), or
++     * PCMK_EXEC_NOT_CONNECTED ("unknown because fencer connection was lost").
++     */
++    enum pcmk_exec_status fence_probe_result;
+ 
+     crm_trigger_t *work;
+ } lrmd_rsc_t;
+-- 
+2.27.0
+
+
+From 1ab799d945171ab8d91bd0aada64e70a71193e5c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 12:14:48 -0600
+Subject: [PATCH 08/17] Low: executor: don't require a fencer connection for
+ probes
+
+For fence devices, probe results are based on earlier state determinations,
+so handle them before requiring an active fencer connection. The effect may be
+negligible, but it would allow probes to proceed while waiting for a
+reconnection.
+---
+ daemons/execd/execd_commands.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 6e5505e973..5999ba19c9 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -1255,7 +1255,13 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+ 
+     stonith_t *stonith_api = get_stonith_connection();
+ 
+-    if (!stonith_api) {
++    if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)
++        && (cmd->interval_ms == 0)) {
++        // Probes don't require a fencer connection
++        finalize_fence_device_probe(cmd, rsc->fence_probe_result);
++        return;
++
++    } else if (stonith_api == NULL) {
+         rc = -ENOTCONN;
+ 
+     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+@@ -1268,12 +1274,7 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+         rc = execd_stonith_stop(stonith_api, rsc);
+ 
+     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+-        if (cmd->interval_ms > 0) {
+-            do_monitor = TRUE;
+-        } else {
+-            finalize_fence_device_probe(cmd, rsc->fence_probe_result);
+-            return;
+-        }
++        do_monitor = TRUE;
+     }
+ 
+     if (do_monitor) {
+-- 
+2.27.0
+
+
+From adf41fb1637bcc9a6e057be52d61a0b26e4535cc Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 12:20:34 -0600
+Subject: [PATCH 09/17] Low: executor: return an error for unsupported fence
+ device actions
+
+... and set an exit reason. Previously, it would return success for unsupported
+actions. It shouldn't be possible, but it would be nice to have an indication
+of what is wrong if a bug is introduced.
+---
+ daemons/execd/execd_commands.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 5999ba19c9..772d6446dc 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -1275,6 +1275,12 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+ 
+     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
+         do_monitor = TRUE;
++
++    } else {
++        stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
++                                PCMK_EXEC_ERROR,
++                                "Invalid fence device action (bug?)");
++        return;
+     }
+ 
+     if (do_monitor) {
+-- 
+2.27.0
+
+
+From af59dfe85bc83f5609d0a3b3b7939271549cb76f Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 12:24:07 -0600
+Subject: [PATCH 10/17] Low: executor: set exit reason if no fencer connection
+
+---
+ daemons/execd/execd_commands.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 772d6446dc..7ae309d94c 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -1262,7 +1262,10 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+         return;
+ 
+     } else if (stonith_api == NULL) {
+-        rc = -ENOTCONN;
++        stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
++                                PCMK_EXEC_NOT_CONNECTED,
++                                "No connection to fencer");
++        return;
+ 
+     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
+         rc = execd_stonith_start(stonith_api, rsc, cmd);
+-- 
+2.27.0
+
+
+From ad0930b75d5617490c3a0dc3c6b83411b3c4536d Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 14:42:26 -0600
+Subject: [PATCH 11/17] Test: cts-fence-helper: log full result in fence
+ callback
+
+---
+ daemons/fenced/cts-fence-helper.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
+index 2adb032f24..c2b55d73b9 100644
+--- a/daemons/fenced/cts-fence-helper.c
++++ b/daemons/fenced/cts-fence-helper.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2020 the Pacemaker project contributors
++ * Copyright 2009-2021 the Pacemaker project contributors
+  *
+  * This source code is licensed under the GNU General Public License version 2
+  * or later (GPLv2+) WITHOUT ANY WARRANTY.
+@@ -132,7 +132,10 @@ st_callback(stonith_t * st, stonith_event_t * e)
+ static void
+ st_global_callback(stonith_t * stonith, stonith_callback_data_t * data)
+ {
+-    crm_notice("Call id %d completed with rc %d", data->call_id, data->rc);
++    crm_notice("Call %d exited %d: %s (%s)",
++               data->call_id, stonith__exit_status(data),
++               stonith__execution_status(data),
++               crm_str(stonith__exit_reason(data)));
+ }
+ 
+ static void
+-- 
+2.27.0
+
+
+From 1b50ff4d83b7a96cd70389891b7b6568812f66f6 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 15:10:14 -0600
+Subject: [PATCH 12/17] Test: cts-fence-helper: track full result instead of
+ legacy return code
+
+---
+ daemons/fenced/cts-fence-helper.c | 77 +++++++++++++++----------------
+ 1 file changed, 37 insertions(+), 40 deletions(-)
+
+diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
+index c2b55d73b9..2739f57804 100644
+--- a/daemons/fenced/cts-fence-helper.c
++++ b/daemons/fenced/cts-fence-helper.c
+@@ -34,23 +34,12 @@
+ static GMainLoop *mainloop = NULL;
+ static crm_trigger_t *trig = NULL;
+ static int mainloop_iter = 0;
+-static int callback_rc = 0;
++static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
+ typedef void (*mainloop_test_iteration_cb) (int check_event);
+ 
+ #define MAINLOOP_DEFAULT_TIMEOUT 2
+ 
+-#define mainloop_test_done(pass) \
+-    if (pass) { \
+-        crm_info("SUCCESS - %s", __func__); \
+-        mainloop_iter++;   \
+-        mainloop_set_trigger(trig);  \
+-    } else { \
+-        crm_err("FAILURE = %s async_callback %d", __func__, callback_rc); \
+-        crm_exit(CRM_EX_ERROR); \
+-    } \
+-    callback_rc = 0; \
+-
+-
+ enum test_modes {
+     test_standard = 0,  // test using a specific developer environment
+     test_passive,       // watch notifications only
+@@ -93,6 +82,23 @@ static const int st_opts = st_opt_sync_call;
+ static int expected_notifications = 0;
+ static int verbose = 0;
+ 
++static void
++mainloop_test_done(const char *origin, bool pass)
++{
++    if (pass) {
++        crm_info("SUCCESS - %s", origin);
++        mainloop_iter++;
++        mainloop_set_trigger(trig);
++        result.execution_status = PCMK_EXEC_UNKNOWN;
++        result.exit_status = CRM_EX_OK;
++    } else {
++        crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status,
++                pcmk_exec_status_str(result.execution_status));
++        crm_exit(CRM_EX_ERROR);
++    }
++}
++
++
+ static void
+ dispatch_helper(int timeout)
+ {
+@@ -385,7 +391,9 @@ static void
+ static void
+ mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data)
+ {
+-    callback_rc = data->rc;
++    pcmk__set_result(&result, stonith__exit_status(data),
++                     stonith__execution_status(data),
++                     stonith__exit_reason(data));
+     iterate_mainloop_tests(TRUE);
+ }
+ 
+@@ -404,18 +412,14 @@ test_async_fence_pass(int check_event)
+     int rc = 0;
+ 
+     if (check_event) {
+-        if (callback_rc != 0) {
+-            mainloop_test_done(FALSE);
+-        } else {
+-            mainloop_test_done(TRUE);
+-        }
++        mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
+         return;
+     }
+ 
+     rc = st->cmds->fence(st, 0, "true_1_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+     if (rc < 0) {
+         crm_err("fence failed with rc %d", rc);
+-        mainloop_test_done(FALSE);
++        mainloop_test_done(__func__, false);
+     }
+     register_callback_helper(rc);
+     /* wait for event */
+@@ -431,15 +435,15 @@ test_async_fence_custom_timeout(int check_event)
+     if (check_event) {
+         uint32_t diff = (time(NULL) - begin);
+ 
+-        if (callback_rc != -ETIME) {
+-            mainloop_test_done(FALSE);
++        if (result.execution_status != PCMK_EXEC_TIMEOUT) {
++            mainloop_test_done(__func__, false);
+         } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) {
+             crm_err
+                 ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d",
+                  CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff);
+-            mainloop_test_done(FALSE);
++            mainloop_test_done(__func__, false);
+         } else {
+-            mainloop_test_done(TRUE);
++            mainloop_test_done(__func__, true);
+         }
+         return;
+     }
+@@ -448,7 +452,7 @@ test_async_fence_custom_timeout(int check_event)
+     rc = st->cmds->fence(st, 0, "custom_timeout_node1", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+     if (rc < 0) {
+         crm_err("fence failed with rc %d", rc);
+-        mainloop_test_done(FALSE);
++        mainloop_test_done(__func__, false);
+     }
+     register_callback_helper(rc);
+     /* wait for event */
+@@ -460,18 +464,15 @@ test_async_fence_timeout(int check_event)
+     int rc = 0;
+ 
+     if (check_event) {
+-        if (callback_rc != -ENODEV) {
+-            mainloop_test_done(FALSE);
+-        } else {
+-            mainloop_test_done(TRUE);
+-        }
++        mainloop_test_done(__func__,
++                           (result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE));
+         return;
+     }
+ 
+     rc = st->cmds->fence(st, 0, "false_1_node2", "off", MAINLOOP_DEFAULT_TIMEOUT, 0);
+     if (rc < 0) {
+         crm_err("fence failed with rc %d", rc);
+-        mainloop_test_done(FALSE);
++        mainloop_test_done(__func__, false);
+     }
+     register_callback_helper(rc);
+     /* wait for event */
+@@ -483,18 +484,14 @@ test_async_monitor(int check_event)
+     int rc = 0;
+ 
+     if (check_event) {
+-        if (callback_rc) {
+-            mainloop_test_done(FALSE);
+-        } else {
+-            mainloop_test_done(TRUE);
+-        }
++        mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK));
+         return;
+     }
+ 
+     rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT);
+     if (rc < 0) {
+         crm_err("monitor failed with rc %d", rc);
+-        mainloop_test_done(FALSE);
++        mainloop_test_done(__func__, false);
+     }
+ 
+     register_callback_helper(rc);
+@@ -531,7 +528,7 @@ test_register_async_devices(int check_event)
+                               params);
+     stonith_key_value_freeall(params, 1, 1);
+ 
+-    mainloop_test_done(TRUE);
++    mainloop_test_done(__func__, true);
+ }
+ 
+ static void
+@@ -540,11 +537,11 @@ try_mainloop_connect(int check_event)
+     int rc = stonith_api_connect_retry(st, crm_system_name, 10);
+ 
+     if (rc == pcmk_ok) {
+-        mainloop_test_done(TRUE);
++        mainloop_test_done(__func__, true);
+         return;
+     }
+     crm_err("API CONNECTION FAILURE");
+-    mainloop_test_done(FALSE);
++    mainloop_test_done(__func__, false);
+ }
+ 
+ static void
+-- 
+2.27.0
+
+
+From 8ff4b384a34828a4a9eebe896324ba8c89e5d66c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 10 Jan 2022 10:27:45 -0600
+Subject: [PATCH 13/17] Doc: Pacemaker Development: correct typo
+
+caught in review
+---
+ doc/sphinx/Pacemaker_Development/components.rst | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst
+index 68158484ce..c4d10fc9f5 100644
+--- a/doc/sphinx/Pacemaker_Development/components.rst
++++ b/doc/sphinx/Pacemaker_Development/components.rst
+@@ -171,7 +171,7 @@ messaging layer callback, which calls:
+ 
+     * ``fenced_process_fencing_reply()``, which calls either
+       ``request_peer_fencing()`` (to retry a failed operation, or try the next
+-      device in a topology is appropriate, which issues a new
++      device in a topology if appropriate, which issues a new
+       ``STONITH_OP_FENCE`` request, proceeding as before) or
+       ``finalize_op()`` (if the operation is definitively failed or
+       successful).
+-- 
+2.27.0
+
+
+From 822ee6fbd8583a2939c636b3bccceffcc338c567 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 10 Jan 2022 11:05:40 -0600
+Subject: [PATCH 14/17] Doc: Pacemaker Development: add a placeholder for how
+ fencing history works
+
+---
+ doc/sphinx/Pacemaker_Development/components.rst | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/doc/sphinx/Pacemaker_Development/components.rst b/doc/sphinx/Pacemaker_Development/components.rst
+index c4d10fc9f5..760da77c9b 100644
+--- a/doc/sphinx/Pacemaker_Development/components.rst
++++ b/doc/sphinx/Pacemaker_Development/components.rst
+@@ -183,6 +183,21 @@ Finally, all peers receive the broadcast result and call
+ * ``finalize_op()``, which sends the result to all local clients.
+ 
+ 
++.. index::
++   single: fence history
++
++Fencing History
++_______________
++
++The fencer keeps a running history of all fencing operations. The bulk of the
++relevant code is in `fenced_history.c` and ensures the history is synchronized
++across all nodes even if a node leaves and rejoins the cluster.
++
++In libstonithd, this information is represented by `stonith_history_t` and is
++queryable by the `stonith_api_operations_t:history()` method. `crm_mon` and
++`stonith_admin` use this API to display the history.
++
++
+ .. index::
+    single: scheduler
+    single: pacemaker-schedulerd
+-- 
+2.27.0
+
+
+From d9b4060f2dadb40d5ee7535e0b2890a83d216c1e Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 10 Jan 2022 11:25:31 -0600
+Subject: [PATCH 15/17] Log: fencing: add exit reason for results without a
+ callback
+
+---
+ lib/fencing/st_client.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 9d93ffd481..4823751267 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -926,9 +926,11 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
+                                      cb_info->user_data, cb_info->callback);
+ 
+     } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) {
+-        crm_warn("Fencing action without registered callback failed: %d (%s)",
++        crm_warn("Fencing action without registered callback failed: %d (%s%s%s)",
+                  result.exit_status,
+-                 pcmk_exec_status_str(result.execution_status));
++                 pcmk_exec_status_str(result.execution_status),
++                 ((result.exit_reason == NULL)? "" : ": "),
++                 ((result.exit_reason == NULL)? "" : result.exit_reason));
+         crm_log_xml_debug(msg, "Failed fence update");
+     }
+ 
+-- 
+2.27.0
+
+
+From 9956b3ad2f1c6fba305252616ad0b35a38ab96da Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 11 Jan 2022 09:28:27 -0600
+Subject: [PATCH 16/17] Refactor: executor: keep formatting consistent
+
+... even if the line runs a little long
+---
+ daemons/execd/execd_commands.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
+index 7ae309d94c..bc3b392b2c 100644
+--- a/daemons/execd/execd_commands.c
++++ b/daemons/execd/execd_commands.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2012-2021 the Pacemaker project contributors
++ * Copyright 2012-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -1297,7 +1297,7 @@ lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
+     stonith_action_complete(cmd,
+                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
+                             stonith__legacy2status(rc),
+-                            rc == -pcmk_err_generic? NULL : pcmk_strerror(rc));
++                            ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
+ }
+ 
+ static int
+-- 
+2.27.0
+
+
+From 69d8ecb17568d6c3ecad0e5735756f58a4bce5a1 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 11 Jan 2022 09:29:03 -0600
+Subject: [PATCH 17/17] Test: cts-fence-helper: use more intuitive execution
+ status for completed tests
+
+It doesn't matter since the value is only checked against a couple of specific
+failure values, but this is less confusing.
+---
+ daemons/fenced/cts-fence-helper.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
+index 2739f57804..e222a59f9f 100644
+--- a/daemons/fenced/cts-fence-helper.c
++++ b/daemons/fenced/cts-fence-helper.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2021 the Pacemaker project contributors
++ * Copyright 2009-2022 the Pacemaker project contributors
+  *
+  * This source code is licensed under the GNU General Public License version 2
+  * or later (GPLv2+) WITHOUT ANY WARRANTY.
+@@ -89,7 +89,7 @@ mainloop_test_done(const char *origin, bool pass)
+         crm_info("SUCCESS - %s", origin);
+         mainloop_iter++;
+         mainloop_set_trigger(trig);
+-        result.execution_status = PCMK_EXEC_UNKNOWN;
++        result.execution_status = PCMK_EXEC_DONE;
+         result.exit_status = CRM_EX_OK;
+     } else {
+         crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status,
+-- 
+2.27.0
+
diff --git a/SOURCES/012-notify-crash.patch b/SOURCES/012-notify-crash.patch
new file mode 100644
index 0000000..c18e4f5
--- /dev/null
+++ b/SOURCES/012-notify-crash.patch
@@ -0,0 +1,65 @@
+From ed8b2c86ab77aaa3d7fd688c049ad5e1b922a9c6 Mon Sep 17 00:00:00 2001
+From: Reid Wahl <nrwahl@protonmail.com>
+Date: Thu, 13 Jan 2022 02:56:55 -0800
+Subject: [PATCH] Fix: liblrmd: Avoid double-free during notify operation
+
+This commit fixes a regression introduced by 31c7fa8a, causing a
+double-free in notify operations. lrmd_dispatch_internal() assigns the
+exit_reason string directly from an XML node to a new lrmd_event_data_t
+object (without duplicating), and this string gets freed twice.
+
+Free #1: pcmk__create_history_xml() (reached via callback) calls
+lrmd__set_result(), which frees event.exit_reason and sets it to NULL.
+Free #2: lrmd_ipc_dispatch() frees the XML node, which contains a
+pointer to the exit_reason string just freed, after
+lrmd_dispatch_internal() returns.
+
+Prior to 31c7fa8a, pcmk__create_history_xml reset event.rc and
+event.op_status but **not** event.exit_reason.
+
+In this commit we simply make a copy of event.exit_reason in
+lrmd_dispatch_internal() before the callback. This way we don't have to
+worry about whatever happens in the callback, and we can continue to
+unset the exit_reason alongside the rc and op_status. The added overhead
+should be minimal.
+
+This commit also makes a copy of output. That's not strictly necessary
+but adds some futureproofing and allows us to call lrmd__reset_result()
+at the end of lrmd_dispatch_internal().
+
+Resolves: RHBZ#2039675
+
+Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
+---
+ lib/lrmd/lrmd_client.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
+index ee31bb5ae9..5131a648b7 100644
+--- a/lib/lrmd/lrmd_client.c
++++ b/lib/lrmd/lrmd_client.c
+@@ -305,9 +305,10 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
+         event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
+         event.type = lrmd_event_exec_complete;
+ 
+-        // No need to duplicate the memory, so don't use setter functions
+-        event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
+-        event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
++        /* output and exit_reason may be freed by a callback */
++        event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT);
++        lrmd__set_result(&event, event.rc, event.op_status,
++                         crm_element_value(msg, F_LRMD_RSC_EXIT_REASON));
+ 
+         event.params = xml2list(msg);
+     } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) {
+@@ -324,6 +325,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
+     if (event.params) {
+         g_hash_table_destroy(event.params);
+     }
++    lrmd__reset_result(&event);
+ }
+ 
+ // \return Always 0, to indicate that IPC mainloop source should be kept
+-- 
+2.27.0
+
diff --git a/SOURCES/013-probe-failures.patch b/SOURCES/013-probe-failures.patch
new file mode 100644
index 0000000..c13867e
--- /dev/null
+++ b/SOURCES/013-probe-failures.patch
@@ -0,0 +1,26 @@
+From 186d5a02fba919c455fd6eeb050b4be107f82159 Mon Sep 17 00:00:00 2001
+From: Chris Lumens <clumens@redhat.com>
+Date: Thu, 13 Jan 2022 17:02:47 -0500
+Subject: [PATCH] Low: scheduler: Use the old RC code to log maskable probe
+ failures.
+
+---
+ lib/pengine/unpack.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
+index 8a2d2a6d6d..b01f86257a 100644
+--- a/lib/pengine/unpack.c
++++ b/lib/pengine/unpack.c
+@@ -3780,7 +3780,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
+ 
+     if (maskable_probe_failure) {
+         crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
+-                   services_ocf_exitcode_str(rc), rsc->id, node->details->uname);
++                   services_ocf_exitcode_str(old_rc), rsc->id, node->details->uname);
+         update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
+                               on_fail, data_set);
+         crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
+-- 
+2.27.0
+
diff --git a/SOURCES/014-pcmk_delay_base.patch b/SOURCES/014-pcmk_delay_base.patch
new file mode 100644
index 0000000..8aba265
--- /dev/null
+++ b/SOURCES/014-pcmk_delay_base.patch
@@ -0,0 +1,43 @@
+From 9d812b0401d4cedef53a3cc3653ec782a5c49e37 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 13 Jan 2022 10:42:02 -0600
+Subject: [PATCH] Doc: fencer: improve pcmk_delay_base meta-data
+
+Update its type, since its value can now be a node map as well as a string,
+and add more detail to its description.
+---
+ daemons/fenced/pacemaker-fenced.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
+index 1b954be5a4..12f331496c 100644
+--- a/daemons/fenced/pacemaker-fenced.c
++++ b/daemons/fenced/pacemaker-fenced.c
+@@ -1548,13 +1548,17 @@ main(int argc, char **argv)
+                PCMK_STONITH_DELAY_BASE);
+         printf("    <shortdesc lang=\"en\">Enable a base delay for "
+                "fencing actions and specify base delay value.</shortdesc>\n");
+-        printf("    <longdesc lang=\"en\">This prevents double fencing when "
+-               "different delays are configured on the nodes.\nUse this to "
+-               "enable a static delay for fencing actions.\nThe overall delay "
+-               "is derived from a random delay value adding this static delay "
+-               "so that the sum is kept below the maximum delay.\nSet to eg. "
+-               "node1:1s;node2:5 to set different value per node.</longdesc>\n");
+-        printf("    <content type=\"time\" default=\"0s\"/>\n");
++        printf("    <longdesc lang=\"en\">This enables a static delay for "
++               "fencing actions, which can help avoid \"death matches\" where "
++               "two nodes try to fence each other at the same time. If "
++               PCMK_STONITH_DELAY_MAX " is also used, a random delay will be "
++               "added such that the total delay is kept below that value.\n"
++               "This can be set to a single time value to apply to any node "
++               "targeted by this device (useful if a separate device is "
++               "configured for each target), or to a node map (for example, "
++               "\"node1:1s;node2:5\") to set a different value per target.\n"
++               "    </longdesc>\n");
++        printf("    <content type=\"string\" default=\"0s\"/>\n");
+         printf("  </parameter>\n");
+ 
+         printf("  <parameter name=\"%s\" unique=\"0\">\n",
+-- 
+2.27.0
+
diff --git a/SOURCES/015-fencing-reasons.patch b/SOURCES/015-fencing-reasons.patch
new file mode 100644
index 0000000..c53b6c9
--- /dev/null
+++ b/SOURCES/015-fencing-reasons.patch
@@ -0,0 +1,1093 @@
+From 87365f49b1bee0baa536783865fbd835a9cacc97 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 2 Dec 2021 16:12:24 -0600
+Subject: [PATCH 01/11] Refactor: libstonithd: functionize getting notification
+ data XML
+
+Also, only get the data when needed.
+---
+ lib/fencing/st_client.c | 32 +++++++++++++++++++++++---------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 4823751267..72a0a49408 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -1312,6 +1312,23 @@ stonith_dump_pending_callbacks(stonith_t * stonith)
+     return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
+ }
+ 
++/*!
++ * \internal
++ * \brief Get the data section of a fencer notification
++ *
++ * \param[in] msg    Notification XML
++ * \param[in] ntype  Notification type
++ */
++static xmlNode *
++get_event_data_xml(xmlNode *msg, const char *ntype)
++{
++    char *data_addr = crm_strdup_printf("//%s", ntype);
++    xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
++
++    free(data_addr);
++    return data;
++}
++
+ /*
+  <notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
+    <st_calldata >
+@@ -1336,17 +1353,18 @@ xml_to_event(xmlNode * msg)
+ {
+     stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
+     const char *ntype = crm_element_value(msg, F_SUBTYPE);
+-    char *data_addr = crm_strdup_printf("//%s", ntype);
+-    xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
+ 
+     crm_log_xml_trace(msg, "stonith_notify");
+ 
+     crm_element_value_int(msg, F_STONITH_RC, &(event->result));
+ 
+     if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
+-        event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
++        xmlNode *data = get_event_data_xml(msg, ntype);
+ 
+-        if (data) {
++        if (data == NULL) {
++            crm_err("No data for %s event", ntype);
++            crm_log_xml_notice(msg, "BadEvent");
++        } else {
+             event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
+             event->action = crm_element_value_copy(data, F_STONITH_ACTION);
+             event->target = crm_element_value_copy(data, F_STONITH_TARGET);
+@@ -1354,14 +1372,10 @@ xml_to_event(xmlNode * msg)
+             event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
+             event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
+             event->device = crm_element_value_copy(data, F_STONITH_DEVICE);
+-
+-        } else {
+-            crm_err("No data for %s event", ntype);
+-            crm_log_xml_notice(msg, "BadEvent");
+         }
++        event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
+     }
+ 
+-    free(data_addr);
+     return event;
+ }
+ 
+-- 
+2.27.0
+
+
+From 448f86a029d5d7e3c255d813929003a8cc2cffba Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 17:01:23 -0600
+Subject: [PATCH 02/11] Refactor: fencing: parse full result from fencer
+ notifications
+
+stonith_event_t previously contained only the legacy return code for the
+notification event. Use its new opaque member to store the full result, along
+with accessors (available only internally for now). Nothing uses them yet.
+---
+ include/crm/fencing/internal.h |  5 +++
+ lib/fencing/st_client.c        | 68 ++++++++++++++++++++++++++++++++--
+ 2 files changed, 70 insertions(+), 3 deletions(-)
+
+diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
+index eff689e59b..acc16d05e9 100644
+--- a/include/crm/fencing/internal.h
++++ b/include/crm/fencing/internal.h
+@@ -187,10 +187,15 @@ bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
+ bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
+ 
+ int stonith__legacy2status(int rc);
++
+ int stonith__exit_status(stonith_callback_data_t *data);
+ int stonith__execution_status(stonith_callback_data_t *data);
+ const char *stonith__exit_reason(stonith_callback_data_t *data);
+ 
++int stonith__event_exit_status(stonith_event_t *event);
++int stonith__event_execution_status(stonith_event_t *event);
++const char *stonith__event_exit_reason(stonith_event_t *event);
++
+ /*!
+  * \internal
+  * \brief Is a fencing operation in pending state?
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 72a0a49408..f58b3a6745 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -1349,15 +1349,23 @@ get_event_data_xml(xmlNode *msg, const char *ntype)
+  </notify>
+ */
+ static stonith_event_t *
+-xml_to_event(xmlNode * msg)
++xml_to_event(xmlNode *msg, pcmk__action_result_t *result)
+ {
+     stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
+     const char *ntype = crm_element_value(msg, F_SUBTYPE);
+ 
++    CRM_ASSERT((event != NULL) && (result != NULL));
++
+     crm_log_xml_trace(msg, "stonith_notify");
+ 
+-    crm_element_value_int(msg, F_STONITH_RC, &(event->result));
++    // All notification types have the operation result
++    event->opaque = result;
++    stonith__xe_get_result(msg, result);
++
++    // @COMPAT The API originally provided the result as a legacy return code
++    event->result = pcmk_rc2legacy(stonith__result2rc(result));
+ 
++    // Fence notifications have additional information
+     if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
+         xmlNode *data = get_event_data_xml(msg, ntype);
+ 
+@@ -1392,6 +1400,7 @@ event_free(stonith_event_t * event)
+     free(event->executioner);
+     free(event->device);
+     free(event->client_origin);
++    pcmk__reset_result((pcmk__action_result_t *) (event->opaque));
+     free(event);
+ }
+ 
+@@ -1402,6 +1411,7 @@ stonith_send_notification(gpointer data, gpointer user_data)
+     stonith_notify_client_t *entry = data;
+     stonith_event_t *st_event = NULL;
+     const char *event = NULL;
++    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     if (blob->xml == NULL) {
+         crm_warn("Skipping callback - NULL message");
+@@ -1427,7 +1437,7 @@ stonith_send_notification(gpointer data, gpointer user_data)
+         return;
+     }
+ 
+-    st_event = xml_to_event(blob->xml);
++    st_event = xml_to_event(blob->xml, &result);
+ 
+     crm_trace("Invoking callback for %p/%s event...", entry, event);
+     entry->notify(blob->stonith, st_event);
+@@ -2366,6 +2376,58 @@ stonith__exit_reason(stonith_callback_data_t *data)
+     return ((pcmk__action_result_t *) data->opaque)->exit_reason;
+ }
+ 
++/*!
++ * \internal
++ * \brief Return the exit status from an event notification
++ *
++ * \param[in] event  Event
++ *
++ * \return Exit status from event
++ */
++int
++stonith__event_exit_status(stonith_event_t *event)
++{
++    if ((event == NULL) || (event->opaque == NULL)) {
++        return CRM_EX_ERROR;
++    }
++    return ((pcmk__action_result_t *) event->opaque)->exit_status;
++}
++
++/*!
++ * \internal
++ * \brief Return the execution status from an event notification
++ *
++ * \param[in] event  Event
++ *
++ * \return Execution status from event
++ */
++int
++stonith__event_execution_status(stonith_event_t *event)
++{
++    if ((event == NULL) || (event->opaque == NULL)) {
++        return PCMK_EXEC_UNKNOWN;
++    }
++    return ((pcmk__action_result_t *) event->opaque)->execution_status;
++}
++
++/*!
++ * \internal
++ * \brief Return the exit reason from an event notification
++ *
++ * \param[in] event  Event
++ *
++ * \return Exit reason from event
++ */
++const char *
++stonith__event_exit_reason(stonith_event_t *event)
++{
++    if ((event == NULL) || (event->opaque == NULL)) {
++        return NULL;
++    }
++    return ((pcmk__action_result_t *) event->opaque)->exit_reason;
++}
++
++
+ // Deprecated functions kept only for backward API compatibility
+ // LCOV_EXCL_START
+ 
+-- 
+2.27.0
+
+
+From 8dab65e65fe760052d1151749a7bfb2203445813 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 19 Nov 2021 17:02:28 -0600
+Subject: [PATCH 03/11] Refactor: fencing: parse full result from synchronous
+ fencer replies
+
+stonith_send_command() now parses the full result from synchronous fencer
+replies, and maps that to a legacy return code, rather than parse the legacy
+return code directly.
+
+The full result is not used yet, and won't be until we can break backward API
+compatibility, since the API functions that call stonith_send_command()
+currently return a legacy code.
+---
+ lib/fencing/st_client.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index f58b3a6745..5fec7529e3 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -1537,11 +1537,13 @@ stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNod
+     crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
+ 
+     if (reply_id == stonith->call_id) {
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
+         crm_trace("Synchronous reply %d received", reply_id);
+ 
+-        if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
+-            rc = -ENOMSG;
+-        }
++        stonith__xe_get_result(op_reply, &result);
++        rc = pcmk_rc2legacy(stonith__result2rc(&result));
++        pcmk__reset_result(&result);
+ 
+         if ((call_options & st_opt_discard_reply) || output_data == NULL) {
+             crm_trace("Discarding reply");
+-- 
+2.27.0
+
+
+From 1beb319d8c62ab93b4c08b26a4e03151906c6189 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 6 Dec 2021 17:13:44 -0600
+Subject: [PATCH 04/11] Log: fencing: improve cts-fence-helper result logs
+
+Use the full result from the fencing event
+---
+ daemons/fenced/cts-fence-helper.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
+index e222a59f9f..858cddc9de 100644
+--- a/daemons/fenced/cts-fence-helper.c
++++ b/daemons/fenced/cts-fence-helper.c
+@@ -125,10 +125,14 @@ st_callback(stonith_t * st, stonith_event_t * e)
+         crm_exit(CRM_EX_DISCONNECT);
+     }
+ 
+-    crm_notice("Operation %s requested by %s %s for peer %s.  %s reported: %s (ref=%s)",
+-               e->operation, e->origin, e->result == pcmk_ok ? "completed" : "failed",
+-               e->target, e->executioner ? e->executioner : "<none>",
+-               pcmk_strerror(e->result), e->id);
++    crm_notice("Operation '%s' targeting %s by %s for %s: %s (exit=%d, ref=%s)",
++               ((e->operation == NULL)? "unknown" : e->operation),
++               ((e->target == NULL)? "no node" : e->target),
++               ((e->executioner == NULL)? "any node" : e->executioner),
++               ((e->origin == NULL)? "unknown client" : e->origin),
++               pcmk_exec_status_str(stonith__event_execution_status(e)),
++               stonith__event_exit_status(e),
++               ((e->id == NULL)? "none" : e->id));
+ 
+     if (expected_notifications) {
+         expected_notifications--;
+-- 
+2.27.0
+
+
+From b26f701833ade5d7441fba317832d6e827bd16d0 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 14 Dec 2021 16:52:09 -0600
+Subject: [PATCH 05/11] Test: cts-fence-helper: update expected return code
+
+Before recent changes, libstonithd obtained the fence API's legacy result code
+directly from the fencer's XML reply, meaning that the legacy code was the
+result of the fencer's mapping of the full result (including the action stderr).
+
+After those changes, libstonithd now ignores the legacy code in the fencer's
+reply, and instead maps the legacy code itself from the full result in the
+fencer's reply.
+
+However, the fencer's reply does not have the action stderr, so failures that
+mapped to -pcmk_err_generic on the server side now map to -ENODATA on the
+client side. Update cts-fence-helper's expected return code to match (neither
+code is particularly useful, so there wouldn't be much benefit from having the
+fencer pass the action stderr with replies, which would be considerable
+additional work).
+---
+ daemons/fenced/cts-fence-helper.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
+index 858cddc9de..e3113452ef 100644
+--- a/daemons/fenced/cts-fence-helper.c
++++ b/daemons/fenced/cts-fence-helper.c
+@@ -207,10 +207,10 @@ run_fence_failure_test(void)
+                 "Register device1 for failure test", 1, 0);
+ 
+     single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
+-                "Fence failure results off", 1, -pcmk_err_generic);
++                "Fence failure results off", 1, -ENODATA);
+ 
+     single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0),
+-                "Fence failure results reboot", 1, -pcmk_err_generic);
++                "Fence failure results reboot", 1, -ENODATA);
+ 
+     single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
+                 "Remove device1 for failure test", 1, 0);
+-- 
+2.27.0
+
+
+From 123429de229c2148e320c76530b95e6ba458b9f6 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 7 Dec 2021 10:28:48 -0600
+Subject: [PATCH 06/11] Low: controller: compare fencing targets
+ case-insensitively
+
+... since they are node names
+---
+ daemons/controld/controld_fencing.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
+index f8d2fc13f4..70e141dc28 100644
+--- a/daemons/controld/controld_fencing.c
++++ b/daemons/controld/controld_fencing.c
+@@ -466,7 +466,7 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
+         return;
+ 
+     } else if ((st_event->result == pcmk_ok)
+-               && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_none)) {
++               && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_casei)) {
+ 
+         /* We were notified of our own fencing. Most likely, either fencing was
+          * misconfigured, or fabric fencing that doesn't cut cluster
+-- 
+2.27.0
+
+
+From 3a067b8e58b3aefb49b2af1c35d0ad28b2de8784 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 7 Dec 2021 10:37:56 -0600
+Subject: [PATCH 07/11] Refactor: controller: best practices for handling
+ fencing notifications
+
+Rename tengine_stonith_notify() to handle_fence_notification(), rename its
+st_event argument to event, add a doxygen block, and use some new variables and
+reformatting to make it easier to follow (and change later).
+---
+ daemons/controld/controld_fencing.c | 131 ++++++++++++++++------------
+ 1 file changed, 75 insertions(+), 56 deletions(-)
+
+diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
+index 70e141dc28..00626444da 100644
+--- a/daemons/controld/controld_fencing.c
++++ b/daemons/controld/controld_fencing.c
+@@ -435,39 +435,59 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
+     }
+ }
+ 
++/*!
++ * \internal
++ * \brief Handle an event notification from the fencing API
++ *
++ * \param[in] st     Fencing API connection
++ * \param[in] event  Fencing API event notification
++ */
+ static void
+-tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
++handle_fence_notification(stonith_t *st, stonith_event_t *event)
+ {
++    bool succeeded = true;
++    const char *executioner = "the cluster";
++    const char *client = "a client";
++
+     if (te_client_id == NULL) {
+         te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
+                                          (unsigned long) getpid());
+     }
+ 
+-    if (st_event == NULL) {
++    if (event == NULL) {
+         crm_err("Notify data not found");
+         return;
+     }
+ 
+-    crmd_alert_fencing_op(st_event);
++    if (event->executioner != NULL) {
++        executioner = event->executioner;
++    }
++    if (event->client_origin != NULL) {
++        client = event->client_origin;
++    }
+ 
+-    if ((st_event->result == pcmk_ok) && pcmk__str_eq("on", st_event->action, pcmk__str_casei)) {
+-        crm_notice("%s was successfully unfenced by %s (at the request of %s)",
+-                   st_event->target,
+-                   st_event->executioner? st_event->executioner : "<anyone>",
+-                   st_event->origin);
+-                /* TODO: Hook up st_event->device */
+-        return;
++    if (event->result != pcmk_ok) {
++        succeeded = false;
++    }
+ 
+-    } else if (pcmk__str_eq("on", st_event->action, pcmk__str_casei)) {
+-        crm_err("Unfencing of %s by %s failed: %s (%d)",
+-                st_event->target,
+-                st_event->executioner? st_event->executioner : "<anyone>",
+-                pcmk_strerror(st_event->result), st_event->result);
+-        return;
++    crmd_alert_fencing_op(event);
+ 
+-    } else if ((st_event->result == pcmk_ok)
+-               && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_casei)) {
++    if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
++        // Unfencing doesn't need special handling, just a log message
++        if (succeeded) {
++            crm_notice("%s was successfully unfenced by %s (at the request of %s)",
++                       event->target, executioner, event->origin);
++                    /* TODO: Hook up event->device */
++        } else {
++            crm_err("Unfencing of %s by %s failed: %s (%d)",
++                    event->target, executioner,
++                    pcmk_strerror(st_event->result), st_event->result);
++        }
++        return;
++    }
+ 
++    if (succeeded
++        && pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
+         /* We were notified of our own fencing. Most likely, either fencing was
+          * misconfigured, or fabric fencing that doesn't cut cluster
+          * communication is in use.
+@@ -478,44 +498,41 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
+          * our subsequent election votes as "not part of our cluster".
+          */
+         crm_crit("We were allegedly just fenced by %s for %s!",
+-                 st_event->executioner? st_event->executioner : "the cluster",
+-                 st_event->origin); /* Dumps blackbox if enabled */
++                 executioner, event->origin); // Dumps blackbox if enabled
+         if (fence_reaction_panic) {
+             pcmk__panic(__func__);
+         } else {
+             crm_exit(CRM_EX_FATAL);
+         }
+-        return;
++        return; // Should never get here
+     }
+ 
+-    /* Update the count of stonith failures for this target, in case we become
++    /* Update the count of fencing failures for this target, in case we become
+      * DC later. The current DC has already updated its fail count in
+      * tengine_stonith_callback().
+      */
+-    if (!AM_I_DC && pcmk__str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
+-        if (st_event->result == pcmk_ok) {
+-            st_fail_count_reset(st_event->target);
++    if (!AM_I_DC
++        && pcmk__str_eq(event->operation, T_STONITH_NOTIFY_FENCE,
++                        pcmk__str_casei)) {
++
++        if (succeeded) {
++            st_fail_count_reset(event->target);
+         } else {
+-            st_fail_count_increment(st_event->target);
++            st_fail_count_increment(event->target);
+         }
+     }
+ 
+     crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
+                CRM_XS " initiator=%s ref=%s",
+-               st_event->target, st_event->result == pcmk_ok ? "" : " not",
+-               st_event->action,
+-               st_event->executioner ? st_event->executioner : "<anyone>",
+-               (st_event->client_origin? st_event->client_origin : "<unknown>"),
+-               pcmk_strerror(st_event->result),
+-               st_event->origin, st_event->id);
+-
+-    if (st_event->result == pcmk_ok) {
+-        crm_node_t *peer = pcmk__search_known_node_cache(0, st_event->target,
++               event->target, (succeeded? "" : " not"),
++               event->action, executioner, client,
++               pcmk_strerror(event->result),
++               event->origin, event->id);
++
++    if (succeeded) {
++        crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
+                                                          CRM_GET_PEER_ANY);
+         const char *uuid = NULL;
+-        gboolean we_are_executioner = pcmk__str_eq(st_event->executioner,
+-                                                   fsa_our_uname,
+-                                                   pcmk__str_casei);
+ 
+         if (peer == NULL) {
+             return;
+@@ -523,10 +540,9 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
+ 
+         uuid = crm_peer_uuid(peer);
+ 
+-        crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
+-        if(AM_I_DC) {
++        if (AM_I_DC) {
+             /* The DC always sends updates */
+-            send_stonith_update(NULL, st_event->target, uuid);
++            send_stonith_update(NULL, event->target, uuid);
+ 
+             /* @TODO Ideally, at this point, we'd check whether the fenced node
+              * hosted any guest nodes, and call remote_node_down() for them.
+@@ -536,31 +552,33 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
+              * on the scheduler creating fence pseudo-events for the guests.
+              */
+ 
+-            if (st_event->client_origin
+-                && !pcmk__str_eq(st_event->client_origin, te_client_id, pcmk__str_casei)) {
+-
+-                /* Abort the current transition graph if it wasn't us
+-                 * that invoked stonith to fence someone
++            if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
++                /* Abort the current transition if it wasn't the cluster that
++                 * initiated fencing.
+                  */
+-                crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
+-                abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
++                crm_info("External fencing operation from %s fenced %s",
++                         client, event->target);
++                abort_transition(INFINITY, tg_restart,
++                                 "External Fencing Operation", NULL);
+             }
+ 
+             /* Assume it was our leader if we don't currently have one */
+-        } else if (pcmk__str_eq(fsa_our_dc, st_event->target, pcmk__str_null_matches | pcmk__str_casei)
++        } else if (pcmk__str_eq(fsa_our_dc, event->target,
++                                pcmk__str_null_matches|pcmk__str_casei)
+                    && !pcmk_is_set(peer->flags, crm_remote_node)) {
+ 
+             crm_notice("Fencing target %s %s our leader",
+-                       st_event->target, (fsa_our_dc? "was" : "may have been"));
++                       event->target, (fsa_our_dc? "was" : "may have been"));
+ 
+             /* Given the CIB resyncing that occurs around elections,
+              * have one node update the CIB now and, if the new DC is different,
+              * have them do so too after the election
+              */
+-            if (we_are_executioner) {
+-                send_stonith_update(NULL, st_event->target, uuid);
++            if (pcmk__str_eq(event->executioner, fsa_our_uname,
++                             pcmk__str_casei)) {
++                send_stonith_update(NULL, event->target, uuid);
+             }
+-            add_stonith_cleanup(st_event->target);
++            add_stonith_cleanup(event->target);
+         }
+ 
+         /* If the target is a remote node, and we host its connection,
+@@ -569,7 +587,7 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
+          * so the failure might not otherwise be detected until the next poke.
+          */
+         if (pcmk_is_set(peer->flags, crm_remote_node)) {
+-            remote_ra_fail(st_event->target);
++            remote_ra_fail(event->target);
+         }
+ 
+         crmd_peer_down(peer, TRUE);
+@@ -632,7 +650,7 @@ te_connect_stonith(gpointer user_data)
+                                                  tengine_stonith_connection_destroy);
+         stonith_api->cmds->register_notification(stonith_api,
+                                                  T_STONITH_NOTIFY_FENCE,
+-                                                 tengine_stonith_notify);
++                                                 handle_fence_notification);
+         stonith_api->cmds->register_notification(stonith_api,
+                                                  T_STONITH_NOTIFY_HISTORY_SYNCED,
+                                                  tengine_stonith_history_synced);
+@@ -837,7 +855,8 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
+         }
+ 
+         /* Increment the fail count now, so abort_for_stonith_failure() can
+-         * check it. Non-DC nodes will increment it in tengine_stonith_notify().
++         * check it. Non-DC nodes will increment it in
++         * handle_fence_notification().
+          */
+         st_fail_count_increment(target);
+         abort_for_stonith_failure(abort_action, target, NULL);
+-- 
+2.27.0
+
+
+From 5ec9dcbbe1ee7f6252968f87d7df5a5ea17244fb Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 7 Dec 2021 10:40:21 -0600
+Subject: [PATCH 08/11] Log: controller: improve messages when handling fencing
+ notifications
+
+Now that the fencing API provides a full result including exit reasons with
+fencing event notifications, make the controller logs more useful and
+consistent.
+---
+ daemons/controld/controld_fencing.c | 34 ++++++++++++++++++++---------
+ 1 file changed, 24 insertions(+), 10 deletions(-)
+
+diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
+index 00626444da..0aa9ef083c 100644
+--- a/daemons/controld/controld_fencing.c
++++ b/daemons/controld/controld_fencing.c
+@@ -448,6 +448,8 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
+     bool succeeded = true;
+     const char *executioner = "the cluster";
+     const char *client = "a client";
++    const char *reason = NULL;
++    int exec_status;
+ 
+     if (te_client_id == NULL) {
+         te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
+@@ -466,22 +468,31 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
+         client = event->client_origin;
+     }
+ 
+-    if (event->result != pcmk_ok) {
++    exec_status = stonith__event_execution_status(event);
++    if ((stonith__event_exit_status(event) != CRM_EX_OK)
++        || (exec_status != PCMK_EXEC_DONE)) {
+         succeeded = false;
++        if (exec_status == PCMK_EXEC_DONE) {
++            exec_status = PCMK_EXEC_ERROR;
++        }
+     }
++    reason = stonith__event_exit_reason(event);
+ 
+     crmd_alert_fencing_op(event);
+ 
+     if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
+         // Unfencing doesn't need special handling, just a log message
+         if (succeeded) {
+-            crm_notice("%s was successfully unfenced by %s (at the request of %s)",
+-                       event->target, executioner, event->origin);
++            crm_notice("%s was unfenced by %s at the request of %s@%s",
++                       event->target, executioner, client, event->origin);
+                     /* TODO: Hook up event->device */
+         } else {
+-            crm_err("Unfencing of %s by %s failed: %s (%d)",
++            crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
+                     event->target, executioner,
+-                    pcmk_strerror(st_event->result), st_event->result);
++                    pcmk_exec_status_str(exec_status),
++                    ((reason == NULL)? "" : ": "),
++                    ((reason == NULL)? "" : reason),
++                    stonith__event_exit_status(event));
+         }
+         return;
+     }
+@@ -522,12 +533,15 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
+         }
+     }
+ 
+-    crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
+-               CRM_XS " initiator=%s ref=%s",
++    crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
++               "%s%s%s%s " CRM_XS " event=%s",
+                event->target, (succeeded? "" : " not"),
+-               event->action, executioner, client,
+-               pcmk_strerror(event->result),
+-               event->origin, event->id);
++               event->action, executioner, client, event->origin,
++               (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
++               ((reason == NULL)? "" : " ("),
++               ((reason == NULL)? "" : reason),
++               ((reason == NULL)? "" : ")"),
++               event->id);
+ 
+     if (succeeded) {
+         crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
+-- 
+2.27.0
+
+
+From fb484933ce7c8f3325300a9e01a114db1bbb5b70 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 7 Dec 2021 11:33:15 -0600
+Subject: [PATCH 09/11] Refactor: controller: move alert functions into own
+ source file
+
+---
+ daemons/controld/Makefile.am            |  1 +
+ daemons/controld/controld_alerts.c      | 92 +++++++++++++++++++++++++
+ daemons/controld/controld_execd_state.c | 75 --------------------
+ 3 files changed, 93 insertions(+), 75 deletions(-)
+ create mode 100644 daemons/controld/controld_alerts.c
+
+diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
+index db45bcba4a..0a29925c0b 100644
+--- a/daemons/controld/Makefile.am
++++ b/daemons/controld/Makefile.am
+@@ -43,6 +43,7 @@ pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la		\
+ 			   $(CLUSTERLIBS)
+ 
+ pacemaker_controld_SOURCES = pacemaker-controld.c	\
++			     controld_alerts.c		\
+ 			     controld_attrd.c		\
+ 			     controld_callbacks.c	\
+ 			     controld_based.c		\
+diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
+new file mode 100644
+index 0000000000..bd92795cf0
+--- /dev/null
++++ b/daemons/controld/controld_alerts.c
+@@ -0,0 +1,92 @@
++/*
++ * Copyright 2012-2021 the Pacemaker project contributors
++ *
++ * The version control history for this file may have further details.
++ *
++ * This source code is licensed under the GNU General Public License version 2
++ * or later (GPLv2+) WITHOUT ANY WARRANTY.
++ */
++
++#include <crm_internal.h>
++
++#include <glib.h>
++#include <libxml/tree.h>
++
++#include <crm/lrmd.h>
++#include <crm/lrmd_internal.h>
++#include <crm/pengine/rules_internal.h>
++#include <crm/pengine/status.h>
++#include <crm/stonith-ng.h>
++
++#include <pacemaker-controld.h>
++
++static GList *crmd_alert_list = NULL;
++
++void
++crmd_unpack_alerts(xmlNode *alerts)
++{
++    pe_free_alert_list(crmd_alert_list);
++    crmd_alert_list = pe_unpack_alerts(alerts);
++}
++
++void
++crmd_alert_node_event(crm_node_t *node)
++{
++    lrm_state_t *lrm_state;
++
++    if (crmd_alert_list == NULL) {
++        return;
++    }
++
++    lrm_state = lrm_state_find(fsa_our_uname);
++    if (lrm_state == NULL) {
++        return;
++    }
++
++    lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
++                         node->uname, node->id, node->state);
++}
++
++void
++crmd_alert_fencing_op(stonith_event_t * e)
++{
++    char *desc;
++    lrm_state_t *lrm_state;
++
++    if (crmd_alert_list == NULL) {
++        return;
++    }
++
++    lrm_state = lrm_state_find(fsa_our_uname);
++    if (lrm_state == NULL) {
++        return;
++    }
++
++    desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
++                             e->action, e->target,
++                             (e->executioner? e->executioner : "<no-one>"),
++                             e->client_origin, e->origin,
++                             pcmk_strerror(e->result), e->id);
++
++    lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
++                            e->target, e->operation, desc, e->result);
++    free(desc);
++}
++
++void
++crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
++{
++    lrm_state_t *lrm_state;
++
++    if (crmd_alert_list == NULL) {
++        return;
++    }
++
++    lrm_state = lrm_state_find(fsa_our_uname);
++    if (lrm_state == NULL) {
++        return;
++    }
++
++    lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
++                             op);
++}
+diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
+index 67c376a426..5dce6c6d59 100644
+--- a/daemons/controld/controld_execd_state.c
++++ b/daemons/controld/controld_execd_state.c
+@@ -777,78 +777,3 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state,
+      */
+     return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
+ }
+-
+-/*
+- * Functions for sending alerts via local executor connection
+- */
+-
+-static GList *crmd_alert_list = NULL;
+-
+-void
+-crmd_unpack_alerts(xmlNode *alerts)
+-{
+-    pe_free_alert_list(crmd_alert_list);
+-    crmd_alert_list = pe_unpack_alerts(alerts);
+-}
+-
+-void
+-crmd_alert_node_event(crm_node_t *node)
+-{
+-    lrm_state_t *lrm_state;
+-
+-    if (crmd_alert_list == NULL) {
+-        return;
+-    }
+-
+-    lrm_state = lrm_state_find(fsa_our_uname);
+-    if (lrm_state == NULL) {
+-        return;
+-    }
+-
+-    lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+-                         node->uname, node->id, node->state);
+-}
+-
+-void
+-crmd_alert_fencing_op(stonith_event_t * e)
+-{
+-    char *desc;
+-    lrm_state_t *lrm_state;
+-
+-    if (crmd_alert_list == NULL) {
+-        return;
+-    }
+-
+-    lrm_state = lrm_state_find(fsa_our_uname);
+-    if (lrm_state == NULL) {
+-        return;
+-    }
+-
+-    desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
+-                             e->action, e->target,
+-                             (e->executioner? e->executioner : "<no-one>"),
+-                             e->client_origin, e->origin,
+-                             pcmk_strerror(e->result), e->id);
+-
+-    lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+-                            e->target, e->operation, desc, e->result);
+-    free(desc);
+-}
+-
+-void
+-crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
+-{
+-    lrm_state_t *lrm_state;
+-
+-    if (crmd_alert_list == NULL) {
+-        return;
+-    }
+-
+-    lrm_state = lrm_state_find(fsa_our_uname);
+-    if (lrm_state == NULL) {
+-        return;
+-    }
+-
+-    lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
+-                             op);
+-}
+-- 
+2.27.0
+
+
+From 3d0b57406bcde6682623e9d62c8ee95878345eb1 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 7 Dec 2021 11:25:41 -0600
+Subject: [PATCH 10/11] Feature: controller,tools: improve description for
+ fencing alerts/traps
+
+This functionizes creating a description for fencing events, so it can be used
+by both the controller for alerts and crm_mon for traps, for consistency.
+
+Now that we have the full result including exit reason, we can improve the
+description, but the format is kept similar to before to minimize the change.
+
+The alert/trap also includes the legacy return code for the event, but we can't
+change that now because lrmd_send_fencing_alert() and the alert/trap
+environment variables are public API.
+---
+ daemons/controld/controld_alerts.c |  8 ++-----
+ include/crm/fencing/internal.h     |  1 +
+ lib/fencing/st_client.c            | 38 ++++++++++++++++++++++++++++++
+ tools/crm_mon.c                    |  5 ++--
+ 4 files changed, 43 insertions(+), 9 deletions(-)
+
+diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
+index bd92795cf0..2e0a67dba2 100644
+--- a/daemons/controld/controld_alerts.c
++++ b/daemons/controld/controld_alerts.c
+@@ -12,6 +12,7 @@
+ #include <glib.h>
+ #include <libxml/tree.h>
+ 
++#include <crm/fencing/internal.h>
+ #include <crm/lrmd.h>
+ #include <crm/lrmd_internal.h>
+ #include <crm/pengine/rules_internal.h>
+@@ -62,12 +63,7 @@ crmd_alert_fencing_op(stonith_event_t * e)
+         return;
+     }
+ 
+-    desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
+-                             e->action, e->target,
+-                             (e->executioner? e->executioner : "<no-one>"),
+-                             e->client_origin, e->origin,
+-                             pcmk_strerror(e->result), e->id);
+-
++    desc = stonith__event_description(e);
+     lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
+                             e->target, e->operation, desc, e->result);
+     free(desc);
+diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
+index acc16d05e9..d2b49f831a 100644
+--- a/include/crm/fencing/internal.h
++++ b/include/crm/fencing/internal.h
+@@ -195,6 +195,7 @@ const char *stonith__exit_reason(stonith_callback_data_t *data);
+ int stonith__event_exit_status(stonith_event_t *event);
+ int stonith__event_execution_status(stonith_event_t *event);
+ const char *stonith__event_exit_reason(stonith_event_t *event);
++char *stonith__event_description(stonith_event_t *event);
+ 
+ /*!
+  * \internal
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 5fec7529e3..b1de912b2a 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -2429,6 +2429,44 @@ stonith__event_exit_reason(stonith_event_t *event)
+     return ((pcmk__action_result_t *) event->opaque)->exit_reason;
+ }
+ 
++/*!
++ * \internal
++ * \brief Return a human-friendly description of a fencing event
++ *
++ * \param[in] event  Event to describe
++ *
++ * \return Newly allocated string with description of \p event
++ * \note The caller is responsible for freeing the return value.
++ *       This function asserts on memory errors and never returns NULL.
++ * \note This currently is useful only for events of type
++ *       T_STONITH_NOTIFY_FENCE.
++ */
++char *
++stonith__event_description(stonith_event_t *event)
++{
++    const char *reason;
++    const char *status;
++
++    if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) {
++        status = pcmk_exec_status_str(stonith__event_execution_status(event));
++    } else if (stonith__event_exit_status(event) != CRM_EX_OK) {
++        status = pcmk_exec_status_str(PCMK_EXEC_ERROR);
++    } else {
++        status = crm_exit_str(CRM_EX_OK);
++    }
++    reason = stonith__event_exit_reason(event);
++
++    return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)",
++                             event->action, event->target,
++                             (event->executioner? event->executioner : "the cluster"),
++                             (event->client_origin? event->client_origin : "a client"),
++                             event->origin, status,
++                             ((reason == NULL)? "" : " ("),
++                             ((reason == NULL)? "" : reason),
++                             ((reason == NULL)? "" : ")"),
++                             event->id);
++}
++
+ 
+ // Deprecated functions kept only for backward API compatibility
+ // LCOV_EXCL_START
+diff --git a/tools/crm_mon.c b/tools/crm_mon.c
+index a6c459aaf7..e7b4fe2847 100644
+--- a/tools/crm_mon.c
++++ b/tools/crm_mon.c
+@@ -2237,9 +2237,8 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
+         /* disconnect cib as well and have everything reconnect */
+         mon_cib_connection_destroy(NULL);
+     } else if (options.external_agent) {
+-        char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
+-                                    e->operation, e->origin, e->target, pcmk_strerror(e->result),
+-                                    e->id);
++        char *desc = stonith__event_description(e);
++
+         send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
+         free(desc);
+     }
+-- 
+2.27.0
+
+
+From 2fe03c2165680c717a1f6106c5150be7d117f1a5 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 14 Jan 2022 10:45:03 -0600
+Subject: [PATCH 11/11] Low: controller: compare case-sensitively where
+ appropriate
+
+---
+ daemons/controld/controld_fencing.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
+index 0aa9ef083c..15954b2358 100644
+--- a/daemons/controld/controld_fencing.c
++++ b/daemons/controld/controld_fencing.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2004-2021 the Pacemaker project contributors
++ * Copyright 2004-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -524,7 +524,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
+      */
+     if (!AM_I_DC
+         && pcmk__str_eq(event->operation, T_STONITH_NOTIFY_FENCE,
+-                        pcmk__str_casei)) {
++                        pcmk__str_none)) {
+ 
+         if (succeeded) {
+             st_fail_count_reset(event->target);
+-- 
+2.27.0
+
diff --git a/SOURCES/016-fencing-crash.patch b/SOURCES/016-fencing-crash.patch
new file mode 100644
index 0000000..c514c64
--- /dev/null
+++ b/SOURCES/016-fencing-crash.patch
@@ -0,0 +1,56 @@
+From e330568504ec379ea42460d21a2e20b1652d9445 Mon Sep 17 00:00:00 2001
+From: Reid Wahl <nrwahl@protonmail.com>
+Date: Fri, 14 Jan 2022 01:35:35 -0800
+Subject: [PATCH] Fix: fencing: Don't set stonith action to pending if fork
+ fails
+
+Currently, we set a stonith action to pending if
+services_action_async_fork_notify() returns true. However, "true" means
+that the svc_action should not be freed. This might be because the
+svc_action forked successfully and is pending, or it might be because
+the svc_action has already been freed.
+
+In the case of stonith actions, if we fail to fork, the stonith_action_t
+object stored in svc_action->cb_data gets freed by the done callback,
+and services_action_async_fork_notify() returns true. If we try to set
+the action to pending, it causes a segfault.
+
+This commit moves the "set to pending" step to the
+stonith_action_async_forked() callback. We avoid the segfault and only
+set it to pending if it's actually pending.
+
+A slight difference in ordering was required to achieve this. Now, the
+action gets set to pending immediately before being added to the
+mainloop, instead of immediately after.
+
+Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
+---
+ lib/fencing/st_actions.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
+index e4e43225cd..306001af69 100644
+--- a/lib/fencing/st_actions.c
++++ b/lib/fencing/st_actions.c
+@@ -550,6 +550,9 @@ stonith_action_async_forked(svc_action_t *svc_action)
+         (action->fork_cb) (svc_action->pid, action->userdata);
+     }
+ 
++    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING,
++                     NULL);
++
+     crm_trace("Child process %d performing action '%s' successfully forked",
+               action->pid, action->action);
+ }
+@@ -619,8 +622,6 @@ internal_stonith_action_execute(stonith_action_t * action)
+         if (services_action_async_fork_notify(svc_action,
+                                               &stonith_action_async_done,
+                                               &stonith_action_async_forked)) {
+-            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
+-                             PCMK_EXEC_PENDING, NULL);
+             return pcmk_ok;
+         }
+ 
+-- 
+2.27.0
+
diff --git a/SOURCES/017-fencing-reasons.patch b/SOURCES/017-fencing-reasons.patch
new file mode 100644
index 0000000..1e100ec
--- /dev/null
+++ b/SOURCES/017-fencing-reasons.patch
@@ -0,0 +1,875 @@
+From 523f62eb235836a01ea039c23ada261a494f7b32 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 10 Nov 2021 15:22:47 -0600
+Subject: [PATCH 01/11] Feature: libpacemaker: improve result for high-level
+ fencing API
+
+Previously, pcmk__fencing_action()'s helpers for asynchronous fencing actions
+initialized the result to a generic error, and then overrode that only on
+success.
+
+Now, set a detailed result for early failures, and use the full result when
+available from the fencing API.
+
+A standard return code is still returned to callers at this point.
+---
+ lib/pacemaker/pcmk_fence.c | 31 ++++++++++++++++++-------------
+ 1 file changed, 18 insertions(+), 13 deletions(-)
+
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index 7d6acd0de6..125e1b268b 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -32,8 +32,8 @@ static struct {
+     unsigned int timeout;
+     unsigned int tolerance;
+     int delay;
+-    int rc;
+-} async_fence_data;
++    pcmk__action_result_t result;
++} async_fence_data = { NULL, };
+ 
+ static int
+ handle_level(stonith_t *st, char *target, int fence_level,
+@@ -76,14 +76,13 @@ handle_level(stonith_t *st, char *target, int fence_level,
+ static void
+ notify_callback(stonith_t * st, stonith_event_t * e)
+ {
+-    if (e->result != pcmk_ok) {
+-        return;
+-    }
++    if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
++        && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
+ 
+-    if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) &&
+-        pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
+-
+-        async_fence_data.rc = e->result;
++        pcmk__set_result(&async_fence_data.result,
++                         stonith__event_exit_status(e),
++                         stonith__event_execution_status(e),
++                         stonith__event_exit_reason(e));
+         g_main_loop_quit(mainloop);
+     }
+ }
+@@ -91,8 +90,9 @@ notify_callback(stonith_t * st, stonith_event_t * e)
+ static void
+ fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
+ {
+-    async_fence_data.rc = data->rc;
+-
++    pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
++                     stonith__execution_status(data),
++                     stonith__exit_reason(data));
+     g_main_loop_quit(mainloop);
+ }
+ 
+@@ -106,6 +106,8 @@ async_fence_helper(gpointer user_data)
+     if (rc != pcmk_ok) {
+         fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
+         g_main_loop_quit(mainloop);
++        pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
++                         PCMK_EXEC_NOT_CONNECTED, NULL);
+         return TRUE;
+     }
+ 
+@@ -121,6 +123,8 @@ async_fence_helper(gpointer user_data)
+ 
+     if (call_id < 0) {
+         g_main_loop_quit(mainloop);
++        pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
++                         PCMK_EXEC_ERROR, pcmk_strerror(call_id));
+         return TRUE;
+     }
+ 
+@@ -146,7 +150,8 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+     async_fence_data.timeout = timeout;
+     async_fence_data.tolerance = tolerance;
+     async_fence_data.delay = delay;
+-    async_fence_data.rc = pcmk_err_generic;
++    pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
++                     NULL);
+ 
+     trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
+     mainloop_set_trigger(trig);
+@@ -156,7 +161,7 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+ 
+     free(async_fence_data.name);
+ 
+-    return pcmk_legacy2rc(async_fence_data.rc);
++    return stonith__result2rc(&async_fence_data.result);
+ }
+ 
+ #ifdef BUILD_PUBLIC_LIBPACEMAKER
+-- 
+2.27.0
+
+
+From 008868fae5d1b0d6d8dc61f7acfb3856801ddd52 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 15:36:10 -0600
+Subject: [PATCH 02/11] Refactor: libpacemaker: add exit reason to high-level
+ fencing API
+
+Nothing uses it as of this commit
+---
+ include/pacemaker.h         |  5 ++++-
+ include/pcmki/pcmki_fence.h |  5 ++++-
+ lib/pacemaker/pcmk_fence.c  | 10 +++++++---
+ tools/stonith_admin.c       |  6 +++---
+ 4 files changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/include/pacemaker.h b/include/pacemaker.h
+index a8523c969e..0daa4c5945 100644
+--- a/include/pacemaker.h
++++ b/include/pacemaker.h
+@@ -189,12 +189,15 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
+  *                      again.
+  * \param[in] delay     Apply a fencing delay. Value -1 means disable also any
+  *                      static/random fencing delays from pcmk_delay_base/max.
++ * \param[out] reason   If not NULL, where to put descriptive failure reason
+  *
+  * \return Standard Pacemaker return code
++ * \note If \p reason is not NULL, the caller is responsible for freeing its
++ *       returned value.
+  */
+ int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
+                       const char *name, unsigned int timeout, unsigned int tolerance,
+-                      int delay);
++                      int delay, char **reason);
+ 
+ /*!
+  * \brief List the fencing operations that have occurred for a specific node.
+diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
+index d4cef68f5c..c3da0361d7 100644
+--- a/include/pcmki/pcmki_fence.h
++++ b/include/pcmki/pcmki_fence.h
+@@ -28,12 +28,15 @@
+  *                      again.
+  * \param[in] delay     Apply a fencing delay. Value -1 means disable also any
+  *                      static/random fencing delays from pcmk_delay_base/max
++ * \param[out] reason   If not NULL, where to put descriptive failure reason
+  *
+  * \return Standard Pacemaker return code
++ * \note If \p reason is not NULL, the caller is responsible for freeing its
++ *       returned value.
+  */
+ int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+                        const char *name, unsigned int timeout, unsigned int tolerance,
+-                       int delay);
++                       int delay, char **reason);
+ 
+ /*!
+  * \brief List the fencing operations that have occurred for a specific node.
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index 125e1b268b..dbf084fb6b 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -139,7 +139,7 @@ async_fence_helper(gpointer user_data)
+ int
+ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+                    const char *name, unsigned int timeout, unsigned int tolerance,
+-                   int delay)
++                   int delay, char **reason)
+ {
+     crm_trigger_t *trig;
+ 
+@@ -161,6 +161,9 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+ 
+     free(async_fence_data.name);
+ 
++    if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
++        *reason = strdup(async_fence_data.result.exit_reason);
++    }
+     return stonith__result2rc(&async_fence_data.result);
+ }
+ 
+@@ -168,9 +171,10 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+ int
+ pcmk_fence_action(stonith_t *st, const char *target, const char *action,
+                   const char *name, unsigned int timeout, unsigned int tolerance,
+-                  int delay)
++                  int delay, char **reason)
+ {
+-    return pcmk__fence_action(st, target, action, name, timeout, tolerance, delay);
++    return pcmk__fence_action(st, target, action, name, timeout, tolerance,
++                              delay, reason);
+ }
+ #endif
+ 
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index 2d48326e1b..fdc7c46d49 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -571,17 +571,17 @@ main(int argc, char **argv)
+ 
+         case 'B':
+             rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay);
++                                    options.tolerance*1000, options.delay, NULL);
+             break;
+ 
+         case 'F':
+             rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay);
++                                    options.tolerance*1000, options.delay, NULL);
+             break;
+ 
+         case 'U':
+             rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay);
++                                    options.tolerance*1000, options.delay, NULL);
+             break;
+ 
+         case 'h':
+-- 
+2.27.0
+
+
+From 7570510f9985ba75ef73fb824f28109e135ace0a Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 15:40:48 -0600
+Subject: [PATCH 03/11] Refactor: libpacemaker: rename high-level fencing API
+
+Rename pcmk_fence_action() to pcmk_request_fencing(), and its internal
+equivalent pcmk__fence_action() to pcmk__request_fencing(). The change is
+backward-compatible because pcmk_fence_action() has not been exposed publicly
+yet.
+
+"Fence action" can be easily confused with libcrmservice actions, liblrmd
+actions, libstonithd actions, scheduler actions, and so forth.
+
+Also, the new name makes it clearer that the caller is requesting that the
+cluster perform fencing, and not directly performing fencing.
+---
+ include/pacemaker.h         | 20 ++++++++++----------
+ include/pcmki/pcmki_fence.h | 16 ++++++++--------
+ lib/pacemaker/pcmk_fence.c  | 16 ++++++++--------
+ tools/stonith_admin.c       | 18 ++++++++++++------
+ 4 files changed, 38 insertions(+), 32 deletions(-)
+
+diff --git a/include/pacemaker.h b/include/pacemaker.h
+index 0daa4c5945..e581f975a9 100644
+--- a/include/pacemaker.h
++++ b/include/pacemaker.h
+@@ -177,27 +177,27 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
+ #ifdef BUILD_PUBLIC_LIBPACEMAKER
+ 
+ /*!
+- * \brief Perform a STONITH action.
++ * \brief Ask the cluster to perform fencing
+  *
+- * \param[in] st        A connection to the STONITH API.
+- * \param[in] target    The node receiving the action.
+- * \param[in] action    The action to perform.
++ * \param[in] st        A connection to the fencer API
++ * \param[in] target    The node that should be fenced
++ * \param[in] action    The fencing action (on, off, reboot) to perform
+  * \param[in] name      Who requested the fence action?
+- * \param[in] timeout   How long to wait for the operation to complete (in ms).
++ * \param[in] timeout   How long to wait for the operation to complete (in ms)
+  * \param[in] tolerance If a successful action for \p target happened within
+  *                      this many ms, return 0 without performing the action
+- *                      again.
++ *                      again
+  * \param[in] delay     Apply a fencing delay. Value -1 means disable also any
+- *                      static/random fencing delays from pcmk_delay_base/max.
++ *                      static/random fencing delays from pcmk_delay_base/max
+  * \param[out] reason   If not NULL, where to put descriptive failure reason
+  *
+  * \return Standard Pacemaker return code
+  * \note If \p reason is not NULL, the caller is responsible for freeing its
+  *       returned value.
+  */
+-int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
+-                      const char *name, unsigned int timeout, unsigned int tolerance,
+-                      int delay, char **reason);
++int pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
++                         const char *name, unsigned int timeout,
++                         unsigned int tolerance, int delay, char **reason);
+ 
+ /*!
+  * \brief List the fencing operations that have occurred for a specific node.
+diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
+index c3da0361d7..e3a7e27264 100644
+--- a/include/pcmki/pcmki_fence.h
++++ b/include/pcmki/pcmki_fence.h
+@@ -13,14 +13,14 @@
+ #  include <crm/common/output_internal.h>
+ 
+ /*!
+- * \brief Perform a STONITH action.
++ * \brief Ask the cluster to perform fencing
+  *
+- * \note This is the internal version of pcmk_fence_action().  External users
++ * \note This is the internal version of pcmk_request_fencing(). External users
+  *       of the pacemaker API should use that function instead.
+  *
+- * \param[in] st        A connection to the STONITH API.
+- * \param[in] target    The node receiving the action.
+- * \param[in] action    The action to perform.
++ * \param[in] st        A connection to the fencer API
++ * \param[in] target    The node that should be fenced
++ * \param[in] action    The fencing action (on, off, reboot) to perform
+  * \param[in] name      Who requested the fence action?
+  * \param[in] timeout   How long to wait for the operation to complete (in ms).
+  * \param[in] tolerance If a successful action for \p target happened within
+@@ -34,9 +34,9 @@
+  * \note If \p reason is not NULL, the caller is responsible for freeing its
+  *       returned value.
+  */
+-int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+-                       const char *name, unsigned int timeout, unsigned int tolerance,
+-                       int delay, char **reason);
++int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
++                          const char *name, unsigned int timeout,
++                          unsigned int tolerance, int delay, char **reason);
+ 
+ /*!
+  * \brief List the fencing operations that have occurred for a specific node.
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index dbf084fb6b..1b7feb54b2 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -137,9 +137,9 @@ async_fence_helper(gpointer user_data)
+ }
+ 
+ int
+-pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+-                   const char *name, unsigned int timeout, unsigned int tolerance,
+-                   int delay, char **reason)
++pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
++                      const char *name, unsigned int timeout,
++                      unsigned int tolerance, int delay, char **reason)
+ {
+     crm_trigger_t *trig;
+ 
+@@ -169,12 +169,12 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
+ 
+ #ifdef BUILD_PUBLIC_LIBPACEMAKER
+ int
+-pcmk_fence_action(stonith_t *st, const char *target, const char *action,
+-                  const char *name, unsigned int timeout, unsigned int tolerance,
+-                  int delay, char **reason)
++pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
++                     const char *name, unsigned int timeout,
++                     unsigned int tolerance, int delay, char **reason)
+ {
+-    return pcmk__fence_action(st, target, action, name, timeout, tolerance,
+-                              delay, reason);
++    return pcmk__request_fencing(st, target, action, name, timeout, tolerance,
++                                 delay, reason);
+ }
+ #endif
+ 
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index fdc7c46d49..56948b3875 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -570,18 +570,24 @@ main(int argc, char **argv)
+             break;
+ 
+         case 'B':
+-            rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay, NULL);
++            rc = pcmk__request_fencing(st, target, "reboot", name,
++                                       options.timeout * 1000,
++                                       options.tolerance * 1000,
++                                       options.delay, NULL);
+             break;
+ 
+         case 'F':
+-            rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay, NULL);
++            rc = pcmk__request_fencing(st, target, "off", name,
++                                       options.timeout * 1000,
++                                       options.tolerance * 1000,
++                                       options.delay, NULL);
+             break;
+ 
+         case 'U':
+-            rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
+-                                    options.tolerance*1000, options.delay, NULL);
++            rc = pcmk__request_fencing(st, target, "on", name,
++                                       options.timeout * 1000,
++                                       options.tolerance * 1000,
++                                       options.delay, NULL);
+             break;
+ 
+         case 'h':
+-- 
+2.27.0
+
+
+From 247eb303df934944c0b72b162bb661cee6e0ed8b Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 15:52:37 -0600
+Subject: [PATCH 04/11] Refactor: tools: drop unnecessary string duplication in
+ stonith_admin
+
+---
+ tools/stonith_admin.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index 56948b3875..c11e302e76 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -360,8 +360,6 @@ main(int argc, char **argv)
+ 
+     pcmk__cli_init_logging("stonith_admin", args->verbosity);
+ 
+-    name = strdup(crm_system_name);
+-
+     rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+     if (rc != pcmk_rc_ok) {
+         exit_code = CRM_EX_ERROR;
+@@ -496,7 +494,7 @@ main(int argc, char **argv)
+     if (st == NULL) {
+         rc = -ENOMEM;
+     } else if (!no_connect) {
+-        rc = st->cmds->connect(st, name, NULL);
++        rc = st->cmds->connect(st, crm_system_name, NULL);
+     }
+     if (rc < 0) {
+         out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
+@@ -570,21 +568,21 @@ main(int argc, char **argv)
+             break;
+ 
+         case 'B':
+-            rc = pcmk__request_fencing(st, target, "reboot", name,
++            rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
+                                        options.timeout * 1000,
+                                        options.tolerance * 1000,
+                                        options.delay, NULL);
+             break;
+ 
+         case 'F':
+-            rc = pcmk__request_fencing(st, target, "off", name,
++            rc = pcmk__request_fencing(st, target, "off", crm_system_name,
+                                        options.timeout * 1000,
+                                        options.tolerance * 1000,
+                                        options.delay, NULL);
+             break;
+ 
+         case 'U':
+-            rc = pcmk__request_fencing(st, target, "on", name,
++            rc = pcmk__request_fencing(st, target, "on", crm_system_name,
+                                        options.timeout * 1000,
+                                        options.tolerance * 1000,
+                                        options.delay, NULL);
+@@ -619,7 +617,6 @@ main(int argc, char **argv)
+         out->finish(out, exit_code, true, NULL);
+         pcmk__output_free(out);
+     }
+-    free(name);
+     stonith_key_value_freeall(options.params, 1, 1);
+ 
+     if (st != NULL) {
+-- 
+2.27.0
+
+
+From a7888bf6868d8d9d9c77f65ae9983cf748bb0548 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 15:56:34 -0600
+Subject: [PATCH 05/11] Refactor: tools: functionize requesting fencing in
+ stonith_admin
+
+... to reduce code duplication and improve readability
+---
+ tools/stonith_admin.c | 27 +++++++++++++++------------
+ 1 file changed, 15 insertions(+), 12 deletions(-)
+
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index c11e302e76..f738a9c888 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -331,6 +331,18 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+     return context;
+ }
+ 
++// \return Standard Pacemaker return code
++static int
++request_fencing(stonith_t *st, const char *target, const char *command)
++{
++    int rc = pcmk__request_fencing(st, target, command, crm_system_name,
++                                       options.timeout * 1000,
++                                       options.tolerance * 1000,
++                                       options.delay, NULL);
++
++    return rc;
++}
++
+ int
+ main(int argc, char **argv)
+ {
+@@ -568,24 +580,15 @@ main(int argc, char **argv)
+             break;
+ 
+         case 'B':
+-            rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
+-                                       options.timeout * 1000,
+-                                       options.tolerance * 1000,
+-                                       options.delay, NULL);
++            rc = request_fencing(st, target, "reboot");
+             break;
+ 
+         case 'F':
+-            rc = pcmk__request_fencing(st, target, "off", crm_system_name,
+-                                       options.timeout * 1000,
+-                                       options.tolerance * 1000,
+-                                       options.delay, NULL);
++            rc = request_fencing(st, target, "off");
+             break;
+ 
+         case 'U':
+-            rc = pcmk__request_fencing(st, target, "on", crm_system_name,
+-                                       options.timeout * 1000,
+-                                       options.tolerance * 1000,
+-                                       options.delay, NULL);
++            rc = request_fencing(st, target, "on");
+             break;
+ 
+         case 'h':
+-- 
+2.27.0
+
+
+From 2da32df780983ec1197e857eed5eeb5bf1101889 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 16:05:19 -0600
+Subject: [PATCH 06/11] Feature: tools: display failure reasons for
+ stonith_admin fencing commands
+
+Previously, stonith_admin's --fence/--unfence/--reboot options did not output
+any error message on failure. Now, they do, including the exit reason, if
+available.
+---
+ tools/stonith_admin.c | 30 +++++++++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 5 deletions(-)
+
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index f738a9c888..5590faf11e 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -333,13 +333,33 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
+ 
+ // \return Standard Pacemaker return code
+ static int
+-request_fencing(stonith_t *st, const char *target, const char *command)
++request_fencing(stonith_t *st, const char *target, const char *command,
++                GError **error)
+ {
++    char *reason = NULL;
+     int rc = pcmk__request_fencing(st, target, command, crm_system_name,
+                                        options.timeout * 1000,
+                                        options.tolerance * 1000,
+-                                       options.delay, NULL);
++                                       options.delay, &reason);
+ 
++    if (rc != pcmk_rc_ok) {
++        const char *rc_str = pcmk_rc_str(rc);
++
++        // If reason is identical to return code string, don't display it twice
++        if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) {
++            free(reason);
++            reason = NULL;
++        }
++
++        g_set_error(error, PCMK__RC_ERROR, rc,
++                    "Couldn't %sfence %s: %s%s%s%s",
++                    ((strcmp(command, "on") == 0)? "un" : ""),
++                    target, pcmk_rc_str(rc),
++                    ((reason == NULL)? "" : " ("),
++                    ((reason == NULL)? "" : reason),
++                    ((reason == NULL)? "" : ")"));
++    }
++    free(reason);
+     return rc;
+ }
+ 
+@@ -580,15 +600,15 @@ main(int argc, char **argv)
+             break;
+ 
+         case 'B':
+-            rc = request_fencing(st, target, "reboot");
++            rc = request_fencing(st, target, "reboot", &error);
+             break;
+ 
+         case 'F':
+-            rc = request_fencing(st, target, "off");
++            rc = request_fencing(st, target, "off", &error);
+             break;
+ 
+         case 'U':
+-            rc = request_fencing(st, target, "on");
++            rc = request_fencing(st, target, "on", &error);
+             break;
+ 
+         case 'h':
+-- 
+2.27.0
+
+
+From 2d99eba4c326d3b13dbbe446971ea5febd5d05be Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 10 Dec 2021 16:08:49 -0600
+Subject: [PATCH 07/11] Feature: libpacemaker: return exit reason for fencer
+ connection failures
+
+... instead of outputting to stderr directly, so that the caller (i.e.
+stonith_admin) can output the error in the correct output format.
+---
+ lib/pacemaker/pcmk_fence.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index 1b7feb54b2..d17b07cda2 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -104,10 +104,9 @@ async_fence_helper(gpointer user_data)
+     int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
+ 
+     if (rc != pcmk_ok) {
+-        fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
+         g_main_loop_quit(mainloop);
+         pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
+-                         PCMK_EXEC_NOT_CONNECTED, NULL);
++                         PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
+         return TRUE;
+     }
+ 
+-- 
+2.27.0
+
+
+From 4480ef0602f47450bdddfbde360a6a8327710927 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 17 Jan 2022 09:39:39 -0600
+Subject: [PATCH 08/11] Low: libpacemaker: compare fence action names
+ case-sensitively
+
+---
+ lib/pacemaker/pcmk_fence.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index d17b07cda2..2a8f50a555 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2021 the Pacemaker project contributors
++ * Copyright 2009-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -77,7 +77,7 @@ static void
+ notify_callback(stonith_t * st, stonith_event_t * e)
+ {
+     if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
+-        && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
++        && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
+ 
+         pcmk__set_result(&async_fence_data.result,
+                          stonith__event_exit_status(e),
+@@ -549,7 +549,7 @@ pcmk__reduce_fence_history(stonith_history_t *history)
+             if ((hp->state == st_done) || (hp->state == st_failed)) {
+                 /* action not in progress */
+                 if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) &&
+-                    pcmk__str_eq(hp->action, np->action, pcmk__str_casei) &&
++                    pcmk__str_eq(hp->action, np->action, pcmk__str_none) &&
+                     (hp->state == np->state) &&
+                     ((hp->state == st_done) ||
+                      pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) {
+-- 
+2.27.0
+
+
+From fe4c65a3b9e715c2b535709f989f2369d3637b78 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 17 Jan 2022 09:45:24 -0600
+Subject: [PATCH 09/11] Refactor: libpacemaker: avoid unnecessary string
+ duplication
+
+... and don't leave any dynamic memory hanging around
+---
+ lib/pacemaker/pcmk_fence.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
+index 2a8f50a555..260fa5ab8e 100644
+--- a/lib/pacemaker/pcmk_fence.c
++++ b/lib/pacemaker/pcmk_fence.c
+@@ -141,6 +141,7 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+                       unsigned int tolerance, int delay, char **reason)
+ {
+     crm_trigger_t *trig;
++    int rc = pcmk_rc_ok;
+ 
+     async_fence_data.st = st;
+     async_fence_data.name = strdup(name);
+@@ -160,10 +161,14 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+ 
+     free(async_fence_data.name);
+ 
+-    if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
+-        *reason = strdup(async_fence_data.result.exit_reason);
++    if (reason != NULL) {
++        // Give the caller ownership of the exit reason
++        *reason = async_fence_data.result.exit_reason;
++        async_fence_data.result.exit_reason = NULL;
+     }
+-    return stonith__result2rc(&async_fence_data.result);
++    rc = stonith__result2rc(&async_fence_data.result);
++    pcmk__reset_result(&async_fence_data.result);
++    return rc;
+ }
+ 
+ #ifdef BUILD_PUBLIC_LIBPACEMAKER
+-- 
+2.27.0
+
+
+From 7b7af07796f05a1adabdac655582be2e17106f81 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 17 Jan 2022 10:07:10 -0600
+Subject: [PATCH 10/11] Doc: libpacemaker: improve pcmk__request_fencing()
+ doxygen block
+
+---
+ include/pacemaker.h         |  6 ++++--
+ include/pcmki/pcmki_fence.h | 15 +++++++++------
+ 2 files changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/include/pacemaker.h b/include/pacemaker.h
+index e581f975a9..266a844892 100644
+--- a/include/pacemaker.h
++++ b/include/pacemaker.h
+@@ -187,8 +187,10 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
+  * \param[in] tolerance If a successful action for \p target happened within
+  *                      this many ms, return 0 without performing the action
+  *                      again
+- * \param[in] delay     Apply a fencing delay. Value -1 means disable also any
+- *                      static/random fencing delays from pcmk_delay_base/max
++ * \param[in] delay     Apply this delay (in milliseconds) before initiating the
++ *                      fencing action (a value of -1 applies no delay and also
++ *                      disables any fencing delay from pcmk_delay_base and
++ *                      pcmk_delay_max)
+  * \param[out] reason   If not NULL, where to put descriptive failure reason
+  *
+  * \return Standard Pacemaker return code
+diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
+index e3a7e27264..4a2fe3c481 100644
+--- a/include/pcmki/pcmki_fence.h
++++ b/include/pcmki/pcmki_fence.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2019-2021 the Pacemaker project contributors
++ * Copyright 2019-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -22,17 +22,20 @@
+  * \param[in] target    The node that should be fenced
+  * \param[in] action    The fencing action (on, off, reboot) to perform
+  * \param[in] name      Who requested the fence action?
+- * \param[in] timeout   How long to wait for the operation to complete (in ms).
++ * \param[in] timeout   How long to wait for the operation to complete (in ms)
+  * \param[in] tolerance If a successful action for \p target happened within
+- *                      this many ms, return 0 without performing the action
+- *                      again.
+- * \param[in] delay     Apply a fencing delay. Value -1 means disable also any
+- *                      static/random fencing delays from pcmk_delay_base/max
++ *                      this many milliseconds, return success without
++ *                      performing the action again
++ * \param[in] delay     Apply this delay (in milliseconds) before initiating the
++ *                      fencing action (a value of -1 applies no delay and also
++ *                      disables any fencing delay from pcmk_delay_base and
++ *                      pcmk_delay_max)
+  * \param[out] reason   If not NULL, where to put descriptive failure reason
+  *
+  * \return Standard Pacemaker return code
+  * \note If \p reason is not NULL, the caller is responsible for freeing its
+  *       returned value.
++ * \todo delay is eventually used with g_timeout_add() and should be guint
+  */
+ int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+                           const char *name, unsigned int timeout,
+-- 
+2.27.0
+
+
+From 61fb7271712e1246eb6d9472dc1afc7cd10e0a79 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 17 Jan 2022 10:18:02 -0600
+Subject: [PATCH 11/11] Fix: tools: get stonith_admin -T option working again
+
+Regression introduced in 2.0.3 by 3910b6fec
+
+This reverts commit 247eb303df934944c0b72b162bb661cee6e0ed8b
+("Refactor: tools: drop unnecessary string duplication in stonith_admin")
+and fixes a regression introduced when stonith_admin was converted to use
+GOption.
+
+The -T option is intended to override the client name passed to the fencer API,
+but the client name was set to the default (crm_system_name) after option
+processing had already been done, so any value for -T was overwritten by the
+default, and its memory was leaked.
+
+This commit sets the default only if -T was not used.
+---
+ tools/stonith_admin.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
+index 5590faf11e..54774b6fee 100644
+--- a/tools/stonith_admin.c
++++ b/tools/stonith_admin.c
+@@ -337,10 +337,10 @@ request_fencing(stonith_t *st, const char *target, const char *command,
+                 GError **error)
+ {
+     char *reason = NULL;
+-    int rc = pcmk__request_fencing(st, target, command, crm_system_name,
+-                                       options.timeout * 1000,
+-                                       options.tolerance * 1000,
+-                                       options.delay, &reason);
++    int rc = pcmk__request_fencing(st, target, command, name,
++                                   options.timeout * 1000,
++                                   options.tolerance * 1000,
++                                   options.delay, &reason);
+ 
+     if (rc != pcmk_rc_ok) {
+         const char *rc_str = pcmk_rc_str(rc);
+@@ -392,6 +392,10 @@ main(int argc, char **argv)
+ 
+     pcmk__cli_init_logging("stonith_admin", args->verbosity);
+ 
++    if (name == NULL) {
++        name = strdup(crm_system_name);
++    }
++
+     rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
+     if (rc != pcmk_rc_ok) {
+         exit_code = CRM_EX_ERROR;
+@@ -526,7 +530,7 @@ main(int argc, char **argv)
+     if (st == NULL) {
+         rc = -ENOMEM;
+     } else if (!no_connect) {
+-        rc = st->cmds->connect(st, crm_system_name, NULL);
++        rc = st->cmds->connect(st, name, NULL);
+     }
+     if (rc < 0) {
+         out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
+@@ -640,6 +644,7 @@ main(int argc, char **argv)
+         out->finish(out, exit_code, true, NULL);
+         pcmk__output_free(out);
+     }
++    free(name);
+     stonith_key_value_freeall(options.params, 1, 1);
+ 
+     if (st != NULL) {
+-- 
+2.27.0
+
diff --git a/SOURCES/018-failure-messages.patch b/SOURCES/018-failure-messages.patch
new file mode 100644
index 0000000..3a2f249
--- /dev/null
+++ b/SOURCES/018-failure-messages.patch
@@ -0,0 +1,796 @@
+From 08c3420f2c857e7b27cd960f355d787af534da7d Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 18 Jan 2022 16:04:49 -0600
+Subject: [PATCH 01/12] Log: libcrmcommon: improve description for "not
+ connected" status
+
+PCMK_EXEC_NOT_CONNECTED was originally added to represent "No executor
+connection", but it can also now mean no fencer connection, so change it to
+"Internal communication failure" which is probably less mysterious to end users
+anyway (especially since it should be accompanied by a more descriptive exit
+reason).
+---
+ include/crm/common/results.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/crm/common/results.h b/include/crm/common/results.h
+index 873faf5c43..3d322a7ce6 100644
+--- a/include/crm/common/results.h
++++ b/include/crm/common/results.h
+@@ -349,7 +349,7 @@ pcmk_exec_status_str(enum pcmk_exec_status status)
+         case PCMK_EXEC_ERROR_HARD:      return "Hard error";
+         case PCMK_EXEC_ERROR_FATAL:     return "Fatal error";
+         case PCMK_EXEC_NOT_INSTALLED:   return "Not installed";
+-        case PCMK_EXEC_NOT_CONNECTED:   return "No executor connection";
++        case PCMK_EXEC_NOT_CONNECTED:   return "Internal communication failure";
+         case PCMK_EXEC_INVALID:         return "Cannot execute now";
+         case PCMK_EXEC_NO_FENCE_DEVICE: return "No fence device";
+         case PCMK_EXEC_NO_SECRETS:      return "CIB secrets unavailable";
+-- 
+2.27.0
+
+
+From 7c345cf8cf0cb054f5634206880df035bfef7311 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 15:12:36 -0600
+Subject: [PATCH 02/12] Refactor: libcrmcommon: drop unnecessary system error
+ redefinitions
+
+portability.h defines some system error codes that might not be present on
+non-Linux systems.
+
+This was a bad idea, since there's no way to ensure the defined values don't
+conflict with existing system codes. However, we use a number of them, so it's
+probably best to keep them, at least until we can make a backward compatibility
+break.
+
+However, we don't use EUNATCH, ENOSR, or ENOSTR, so we can delete those.
+---
+ include/portability.h | 12 ------------
+ lib/common/results.c  |  9 ++++++---
+ 2 files changed, 6 insertions(+), 15 deletions(-)
+
+diff --git a/include/portability.h b/include/portability.h
+index 9a60c583a7..ee065a376d 100644
+--- a/include/portability.h
++++ b/include/portability.h
+@@ -131,10 +131,6 @@ typedef union
+ #    define EREMOTEIO 193
+ #  endif
+ 
+-#  ifndef EUNATCH
+-#    define EUNATCH   194
+-#  endif
+-
+ #  ifndef ENOKEY
+ #    define ENOKEY    195
+ #  endif
+@@ -147,14 +143,6 @@ typedef union
+ #    define ETIME     197
+ #  endif
+ 
+-#  ifndef ENOSR
+-#    define ENOSR     198
+-#  endif
+-
+-#  ifndef ENOSTR
+-#    define ENOSTR    199
+-#  endif
+-
+ #  ifndef EKEYREJECTED
+ #    define EKEYREJECTED 200
+ #  endif
+diff --git a/lib/common/results.c b/lib/common/results.c
+index 6d120694cd..96cd4e5659 100644
+--- a/lib/common/results.c
++++ b/lib/common/results.c
+@@ -118,9 +118,6 @@ pcmk_strerror(int rc)
+         case EREMOTEIO:
+             return "Remote I/O error";
+             /* coverity[dead_error_condition] False positive on non-Linux */
+-        case EUNATCH:
+-            return "Protocol driver not attached";
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+         case ENOKEY:
+             return "Required key not available";
+     }
+@@ -342,8 +339,12 @@ pcmk_rc_name(int rc)
+         case ENOMSG:            return "ENOMSG";
+         case ENOPROTOOPT:       return "ENOPROTOOPT";
+         case ENOSPC:            return "ENOSPC";
++#ifdef ENOSR
+         case ENOSR:             return "ENOSR";
++#endif
++#ifdef ENOSTR
+         case ENOSTR:            return "ENOSTR";
++#endif
+         case ENOSYS:            return "ENOSYS";
+         case ENOTBLK:           return "ENOTBLK";
+         case ENOTCONN:          return "ENOTCONN";
+@@ -376,7 +377,9 @@ pcmk_rc_name(int rc)
+         case ETIME:             return "ETIME";
+         case ETIMEDOUT:         return "ETIMEDOUT";
+         case ETXTBSY:           return "ETXTBSY";
++#ifdef EUNATCH
+         case EUNATCH:           return "EUNATCH";
++#endif
+         case EUSERS:            return "EUSERS";
+         /* case EWOULDBLOCK:    return "EWOULDBLOCK"; */
+         case EXDEV:             return "EXDEV";
+-- 
+2.27.0
+
+
+From eac8d1ca51eac3f437e18584f7e013d976ecee2c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 15:33:12 -0600
+Subject: [PATCH 03/12] Log: libcrmcommon: improve handling of portability.h
+ error codes
+
+portability.h defines some system error codes that might not be present on
+non-Linux systems.
+
+Define a constant for each one (for example, PCMK__ECOMM for ECOMM) when
+the system doesn't have the value, so we can detect that when relevant.
+
+Also, make sure pcmk_rc_name() and pcmk_rc_str() handle all of these values.
+---
+ include/portability.h |  8 ++++++++
+ lib/common/results.c  | 32 ++++++++++++++++++++++++++++++--
+ 2 files changed, 38 insertions(+), 2 deletions(-)
+
+diff --git a/include/portability.h b/include/portability.h
+index ee065a376d..5d5fbf21cb 100644
+--- a/include/portability.h
++++ b/include/portability.h
+@@ -116,34 +116,42 @@ typedef union
+ #  include <errno.h>
+ 
+ #  ifndef ENOTUNIQ
++#    define PCMK__ENOTUNIQ
+ #    define ENOTUNIQ  190
+ #  endif
+ 
+ #  ifndef ECOMM
++#    define PCMK__ECOMM
+ #    define ECOMM     191
+ #  endif
+ 
+ #  ifndef ELIBACC
++#    define PCMK__ELIBACC
+ #    define ELIBACC   192
+ #  endif
+ 
+ #  ifndef EREMOTEIO
++#    define PCMK__EREMOTIO
+ #    define EREMOTEIO 193
+ #  endif
+ 
+ #  ifndef ENOKEY
++#    define PCMK__ENOKEY
+ #    define ENOKEY    195
+ #  endif
+ 
+ #  ifndef ENODATA
++#    define PCMK__ENODATA
+ #    define ENODATA   196
+ #  endif
+ 
+ #  ifndef ETIME
++#    define PCMK__ETIME
+ #    define ETIME     197
+ #  endif
+ 
+ #  ifndef EKEYREJECTED
++#    define PCMK__EKEYREJECTED
+ #    define EKEYREJECTED 200
+ #  endif
+ 
+diff --git a/lib/common/results.c b/lib/common/results.c
+index 96cd4e5659..bcf289d0d6 100644
+--- a/lib/common/results.c
++++ b/lib/common/results.c
+@@ -395,9 +395,9 @@ pcmk_rc_name(int rc)
+ #ifdef EISNAM // Not available on OS X, Illumos, Solaris
+         case EISNAM:            return "EISNAM";
+         case EKEYEXPIRED:       return "EKEYEXPIRED";
+-        case EKEYREJECTED:      return "EKEYREJECTED";
+         case EKEYREVOKED:       return "EKEYREVOKED";
+ #endif
++        case EKEYREJECTED:      return "EKEYREJECTED";
+         case EL2HLT:            return "EL2HLT";
+         case EL2NSYNC:          return "EL2NSYNC";
+         case EL3HLT:            return "EL3HLT";
+@@ -443,7 +443,35 @@ pcmk_rc_str(int rc)
+     if (rc < 0) {
+         return "Unknown error";
+     }
+-    return strerror(rc);
++
++    // Handle values that could be defined by system or by portability.h
++    switch (rc) {
++#ifdef PCMK__ENOTUNIQ
++        case ENOTUNIQ:      return "Name not unique on network";
++#endif
++#ifdef PCMK__ECOMM
++        case ECOMM:         return "Communication error on send";
++#endif
++#ifdef PCMK__ELIBACC
++        case ELIBACC:       return "Can not access a needed shared library";
++#endif
++#ifdef PCMK__EREMOTEIO
++        case EREMOTEIO:     return "Remote I/O error";
++#endif
++#ifdef PCMK__ENOKEY
++        case ENOKEY:        return "Required key not available";
++#endif
++#ifdef PCMK__ENODATA
++        case ENODATA:       return "No data available";
++#endif
++#ifdef PCMK__ETIME
++        case ETIME:         return "Timer expired";
++#endif
++#ifdef PCMK__EKEYREJECTED
++        case EKEYREJECTED:  return "Key was rejected by service";
++#endif
++        default:            return strerror(rc);
++    }
+ }
+ 
+ // This returns negative values for errors
+-- 
+2.27.0
+
+
+From 32a38ac6374f85c43e7f4051f5e519822cc481e6 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 15:39:19 -0600
+Subject: [PATCH 04/12] Log: libcrmcommon: redefine pcmk_strerror() in terms of
+ pcmk_rc_str()
+
+... to reduce code duplication. This causes minor differences in the string for
+a few values.
+---
+ lib/common/results.c | 67 +-------------------------------------------
+ 1 file changed, 1 insertion(+), 66 deletions(-)
+
+diff --git a/lib/common/results.c b/lib/common/results.c
+index bcf289d0d6..b2c6e8d553 100644
+--- a/lib/common/results.c
++++ b/lib/common/results.c
+@@ -57,72 +57,7 @@ pcmk_errorname(int rc)
+ const char *
+ pcmk_strerror(int rc)
+ {
+-    if (rc == 0) {
+-        return "OK";
+-    }
+-
+-    rc = abs(rc);
+-
+-    // Of course rc > 0 ... unless someone passed INT_MIN as rc
+-    if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) {
+-        return strerror(rc);
+-    }
+-
+-    switch (rc) {
+-        case pcmk_err_generic:
+-            return "Generic Pacemaker error";
+-        case pcmk_err_no_quorum:
+-            return "Operation requires quorum";
+-        case pcmk_err_schema_validation:
+-            return "Update does not conform to the configured schema";
+-        case pcmk_err_transform_failed:
+-            return "Schema transform failed";
+-        case pcmk_err_old_data:
+-            return "Update was older than existing configuration";
+-        case pcmk_err_diff_failed:
+-            return "Application of an update diff failed";
+-        case pcmk_err_diff_resync:
+-            return "Application of an update diff failed, requesting a full refresh";
+-        case pcmk_err_cib_modified:
+-            return "The on-disk configuration was manually modified";
+-        case pcmk_err_cib_backup:
+-            return "Could not archive the previous configuration";
+-        case pcmk_err_cib_save:
+-            return "Could not save the new configuration to disk";
+-        case pcmk_err_cib_corrupt:
+-            return "Could not parse on-disk configuration";
+-        case pcmk_err_multiple:
+-            return "Resource active on multiple nodes";
+-        case pcmk_err_node_unknown:
+-            return "Node not found";
+-        case pcmk_err_already:
+-            return "Situation already as requested";
+-        case pcmk_err_bad_nvpair:
+-            return "Bad name/value pair given";
+-        case pcmk_err_schema_unchanged:
+-            return "Schema is already the latest available";
+-        case pcmk_err_unknown_format:
+-            return "Unknown output format";
+-
+-            /* The following cases will only be hit on systems for which they are non-standard */
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+-        case ENOTUNIQ:
+-            return "Name not unique on network";
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+-        case ECOMM:
+-            return "Communication error on send";
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+-        case ELIBACC:
+-            return "Can not access a needed shared library";
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+-        case EREMOTEIO:
+-            return "Remote I/O error";
+-            /* coverity[dead_error_condition] False positive on non-Linux */
+-        case ENOKEY:
+-            return "Required key not available";
+-    }
+-    crm_err("Unknown error code: %d", rc);
+-    return "Unknown error";
++    return pcmk_rc_str(pcmk_legacy2rc(rc));
+ }
+ 
+ // Standard Pacemaker API return codes
+-- 
+2.27.0
+
+
+From 7c331d7e2275ffebbfd5e2f6432a6137a66ee5db Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 15:41:24 -0600
+Subject: [PATCH 05/12] Log: libcrmcommon: don't say "Unknown error"
+
+... which is unhelpful and annoying to users
+---
+ lib/common/results.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/common/results.c b/lib/common/results.c
+index b2c6e8d553..5ffac76549 100644
+--- a/lib/common/results.c
++++ b/lib/common/results.c
+@@ -376,7 +376,7 @@ pcmk_rc_str(int rc)
+         return pcmk__rcs[pcmk_rc_error - rc].desc;
+     }
+     if (rc < 0) {
+-        return "Unknown error";
++        return "Error";
+     }
+ 
+     // Handle values that could be defined by system or by portability.h
+@@ -768,7 +768,7 @@ bz2_strerror(int rc)
+         case BZ_OUTBUFF_FULL:
+             return "output data will not fit into the buffer provided";
+     }
+-    return "Unknown error";
++    return "Data compression error";
+ }
+ 
+ crm_exit_t
+-- 
+2.27.0
+
+
+From 26883b4edda7d81bfcb79bd7b33bb3210beff110 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 16:01:39 -0600
+Subject: [PATCH 06/12] Log: fencing: don't warn if cluster has no watchdog
+ device
+
+---
+ lib/fencing/st_client.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index b1de912b2a..a0f3119f3b 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -187,7 +187,12 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
+                  * we drop in here - so as not to make remote nodes
+                  * panic on that answer
+                  */
+-                crm_warn("watchdog-fencing-query failed");
++                if (rc == -ENODEV) {
++                    crm_notice("Cluster does not have watchdog fencing device");
++                } else {
++                    crm_warn("Could not check for watchdog fencing device: %s",
++                             pcmk_strerror(rc));
++                }
+             } else if (list[0] == '\0') {
+                 rv = TRUE;
+             } else {
+-- 
+2.27.0
+
+
+From 72b3c42232deaca64ffba9582598c59331203761 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Mon, 20 Dec 2021 16:22:49 -0600
+Subject: [PATCH 07/12] Test: libcrmcommon: update pcmk_rc_str() unit test for
+ recent change
+
+---
+ lib/common/tests/results/pcmk__results_test.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/common/tests/results/pcmk__results_test.c b/lib/common/tests/results/pcmk__results_test.c
+index 57a520c501..e08d4b6261 100644
+--- a/lib/common/tests/results/pcmk__results_test.c
++++ b/lib/common/tests/results/pcmk__results_test.c
+@@ -30,7 +30,7 @@ static void
+ test_for_pcmk_rc_str(void **state) {
+     assert_string_equal(pcmk_rc_str(pcmk_rc_error-1), "Unknown output format");
+     assert_string_equal(pcmk_rc_str(pcmk_rc_ok), "OK");
+-    assert_string_equal(pcmk_rc_str(-1), "Unknown error");
++    assert_string_equal(pcmk_rc_str(-1), "Error");
+ }
+ 
+ static void
+-- 
+2.27.0
+
+
+From c1ad3d6640f695321a83183c95fae2f105adc429 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 21 Dec 2021 10:20:38 -0600
+Subject: [PATCH 08/12] Test: cts-lab: update expected patterns for recent
+ changes
+
+---
+ cts/lab/CTStests.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
+index 62c832eb45..f4be998cfb 100644
+--- a/cts/lab/CTStests.py
++++ b/cts/lab/CTStests.py
+@@ -3055,7 +3055,7 @@ class RemoteStonithd(RemoteDriver):
+             r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
+             r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
+             r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
+-            r"error: Result of monitor operation for .* on remote-.*: No executor connection",
++            r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
+         ]
+ 
+         ignore_pats.extend(RemoteDriver.errorstoignore(self))
+-- 
+2.27.0
+
+
+From f272e2f526633c707e894b39c7c7bce3c14de898 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 21 Dec 2021 15:40:49 -0600
+Subject: [PATCH 09/12] Log: controller,libpacemaker: make history XML creation
+ less chatty
+
+Other messages with the same info will already be logged at higher severity
+---
+ daemons/controld/controld_execd.c      |  3 +--
+ daemons/controld/controld_te_actions.c |  7 ++-----
+ include/pcmki/pcmki_sched_utils.h      |  3 +--
+ lib/pacemaker/pcmk_injections.c        |  3 +--
+ lib/pacemaker/pcmk_sched_actions.c     | 12 +++++-------
+ 5 files changed, 10 insertions(+), 18 deletions(-)
+
+diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
+index 15784e7687..52157fa5d4 100644
+--- a/daemons/controld/controld_execd.c
++++ b/daemons/controld/controld_execd.c
+@@ -693,9 +693,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
+         caller_version = CRM_FEATURE_SET;
+     }
+ 
+-    crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
+     xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
+-                                      fsa_our_uname, src, LOG_DEBUG);
++                                      fsa_our_uname, src);
+     if (xml_op == NULL) {
+         return TRUE;
+     }
+diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
+index 63b7c72359..b0bcb8b2e4 100644
+--- a/daemons/controld/controld_te_actions.c
++++ b/daemons/controld/controld_te_actions.c
+@@ -181,7 +181,6 @@ controld_record_action_timeout(crm_action_t *action)
+     lrmd_event_data_t *op = NULL;
+     xmlNode *state = NULL;
+     xmlNode *rsc = NULL;
+-    xmlNode *xml_op = NULL;
+     xmlNode *action_rsc = NULL;
+ 
+     int rc = pcmk_ok;
+@@ -245,12 +244,10 @@ controld_record_action_timeout(crm_action_t *action)
+     op->user_data = pcmk__transition_key(transition_graph->id, action->id,
+                                          target_rc, te_uuid);
+ 
+-    xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
+-                                      target, __func__, LOG_INFO);
++    pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
++                             __func__);
+     lrmd_free_event(op);
+ 
+-    crm_log_xml_trace(xml_op, "Action timeout");
+-
+     rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
+     fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
+     free_xml(state);
+diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
+index 68d60fc7db..144424a609 100644
+--- a/include/pcmki/pcmki_sched_utils.h
++++ b/include/pcmki/pcmki_sched_utils.h
+@@ -52,8 +52,7 @@ extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_wor
+ 
+ xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
+                                  const char *caller_version, int target_rc,
+-                                 const char *node, const char *origin,
+-                                 int level);
++                                 const char *node, const char *origin);
+ 
+ #  define LOAD_STOPPED "load_stopped"
+ 
+diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
+index 678c3f5dd2..1aa90a5a0b 100644
+--- a/lib/pacemaker/pcmk_sched_transition.c
++++ b/lib/pacemaker/pcmk_sched_transition.c
+@@ -201,8 +201,7 @@ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
+ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
+ {
+     return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
+-                                    target_rc, NULL, crm_system_name,
+-                                    LOG_TRACE);
++                                    target_rc, NULL, crm_system_name);
+ }
+ 
+ static xmlNode *
+diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
+index f8200b0efc..4f63d3374d 100644
+--- a/lib/pacemaker/pcmk_sched_utils.c
++++ b/lib/pacemaker/pcmk_sched_utils.c
+@@ -892,14 +892,13 @@ add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
+  * \param[in]     target_rc       Expected result of operation
+  * \param[in]     node            Name of node on which operation was performed
+  * \param[in]     origin          Arbitrary description of update source
+- * \param[in]     level           A log message will be logged at this level
+  *
+  * \return Newly created XML node for history update
+  */
+ xmlNode *
+ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
+                          const char *caller_version, int target_rc,
+-                         const char *node, const char *origin, int level)
++                         const char *node, const char *origin)
+ {
+     char *key = NULL;
+     char *magic = NULL;
+@@ -912,11 +911,10 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
+     const char *task = NULL;
+ 
+     CRM_CHECK(op != NULL, return NULL);
+-    do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
+-               origin, op->rsc_id, op->op_type,
+-               pcmk_exec_status_str(op->op_status), op->interval_ms);
+-
+-    crm_trace("DC version: %s", caller_version);
++    crm_trace("Creating history XML for %s-interval %s action for %s on %s "
++              "(DC version: %s, origin: %s)",
++              pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
++              ((node == NULL)? "no node" : node), caller_version, origin);
+ 
+     task = op->op_type;
+ 
+-- 
+2.27.0
+
+
+From 06b1da9e5345e0d1571042c11646fd7157961279 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Tue, 21 Dec 2021 17:09:44 -0600
+Subject: [PATCH 10/12] Feature: controller: improve exit reason for internal
+ timeouts
+
+Functionize the part of controld_record_action_timeout() that creates a fake
+executor event, into a new function synthesize_timeout_event(), and have it set
+a more detailed exit reason describing what timed out.
+---
+ daemons/controld/controld_te_actions.c | 61 ++++++++++++++++++++------
+ 1 file changed, 48 insertions(+), 13 deletions(-)
+
+diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
+index b0bcb8b2e4..de2fbb82bf 100644
+--- a/daemons/controld/controld_te_actions.c
++++ b/daemons/controld/controld_te_actions.c
+@@ -175,6 +175,53 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
+     return TRUE;
+ }
+ 
++/*!
++ * \internal
++ * \brief Synthesize an executor event for a resource action timeout
++ *
++ * \param[in] action     Resource action that timed out
++ * \param[in] target_rc  Expected result of action that timed out
++ *
++ * Synthesize an executor event for a resource action timeout. (If the executor
++ * gets a timeout while waiting for a resource action to complete, that will be
++ * reported via the usual callback. This timeout means we didn't hear from the
++ * executor itself or the controller that relayed the action to the executor.)
++ *
++ * \return Newly created executor event for result of \p action
++ * \note The caller is responsible for freeing the return value using
++ *       lrmd_free_event().
++ */
++static lrmd_event_data_t *
++synthesize_timeout_event(crm_action_t *action, int target_rc)
++{
++    lrmd_event_data_t *op = NULL;
++    const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
++    const char *reason = NULL;
++    char *dynamic_reason = NULL;
++
++    if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
++        reason = "Local executor did not return result in time";
++    } else {
++        const char *router_node = NULL;
++
++        router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
++        if (router_node == NULL) {
++            router_node = target;
++        }
++        dynamic_reason = crm_strdup_printf("Controller on %s did not return "
++                                           "result in time", router_node);
++        reason = dynamic_reason;
++    }
++
++    op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
++                                       PCMK_OCF_UNKNOWN_ERROR, reason);
++    op->call_id = -1;
++    op->user_data = pcmk__transition_key(transition_graph->id, action->id,
++                                         target_rc, te_uuid);
++    free(dynamic_reason);
++    return op;
++}
++
+ void
+ controld_record_action_timeout(crm_action_t *action)
+ {
+@@ -231,19 +278,7 @@ controld_record_action_timeout(crm_action_t *action)
+     crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
+     crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
+ 
+-    /* If the executor gets a timeout while waiting for the action to complete,
+-     * that will be reported via the usual callback. This timeout means that we
+-     * didn't hear from the executor or the controller that relayed the action
+-     * to the executor.
+-     */
+-    op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
+-                                       PCMK_OCF_UNKNOWN_ERROR,
+-                                       "Cluster communication timeout "
+-                                       "(no response from executor)");
+-    op->call_id = -1;
+-    op->user_data = pcmk__transition_key(transition_graph->id, action->id,
+-                                         target_rc, te_uuid);
+-
++    op = synthesize_timeout_event(action, target_rc);
+     pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
+                              __func__);
+     lrmd_free_event(op);
+-- 
+2.27.0
+
+
+From be620d206faefab967d4c8567d6554d10c9e72ba Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 22 Dec 2021 16:35:06 -0600
+Subject: [PATCH 11/12] Feature: fencing: improve exit reason for fencing
+ timeouts
+
+Troubleshooting timeouts is one of the more difficult aspects of cluster
+maintenance. We want to give as much of a hint as possible, but for fencing in
+particular it is difficult because an operation might involve multiple retries
+of multiple devices.
+
+Barring another major project to track exactly which devices, retries, etc.,
+were used in a given operation, these changes in wording are probably the best
+we can do.
+---
+ daemons/fenced/fenced_remote.c | 8 +++++---
+ lib/fencing/st_client.c        | 2 +-
+ 2 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 1e237150c5..6eebb7381e 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2021 the Pacemaker project contributors
++ * Copyright 2009-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -715,8 +715,10 @@ remote_op_timeout(gpointer userdata)
+                   CRM_XS " id=%.8s",
+                   op->action, op->target, op->client_name, op->id);
+     } else {
+-        finalize_timed_out_op(userdata, "Fencing could not be completed "
+-                                        "within overall timeout");
++        finalize_timed_out_op(userdata, "Fencing did not complete within a "
++                                        "total timeout based on the "
++                                        "configured timeout and retries for "
++                                        "any devices attempted");
+     }
+     return G_SOURCE_REMOVE;
+ }
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index a0f3119f3b..718739b321 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -906,7 +906,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
+     if (msg == NULL) {
+         // Fencer didn't reply in time
+         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+-                         "Timeout waiting for reply from fencer");
++                         "Fencer accepted request but did not reply in time");
+         CRM_LOG_ASSERT(call_id > 0);
+ 
+     } else {
+-- 
+2.27.0
+
+
+From 0fe8ede2f8e838e335fe42846bdf147111ce9955 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 22 Dec 2021 17:09:09 -0600
+Subject: [PATCH 12/12] Feature: libcrmservice: improve exit reason for
+ timeouts
+
+The services library doesn't have enough information about an action to say
+(for example) what configuration parameters might be relevant, but we can at
+least distinguish what kind of agent timed out.
+---
+ lib/services/services_linux.c | 12 +++++++++++-
+ lib/services/systemd.c        |  2 +-
+ 2 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
+index f15eee860e..d6aafcfe46 100644
+--- a/lib/services/services_linux.c
++++ b/lib/services/services_linux.c
+@@ -677,9 +677,19 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
+         parse_exit_reason_from_stderr(op);
+ 
+     } else if (mainloop_child_timeout(p)) {
++        const char *reason = NULL;
++
++        if (op->rsc != NULL) {
++            reason = "Resource agent did not complete in time";
++        } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
++                                pcmk__str_none)) {
++            reason = "Fence agent did not complete in time";
++        } else {
++            reason = "Process did not complete in time";
++        }
+         crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
+         services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
+-                             "Process did not exit within specified timeout");
++                             reason);
+ 
+     } else if (op->cancel) {
+         /* If an in-flight recurring operation was killed because it was
+diff --git a/lib/services/systemd.c b/lib/services/systemd.c
+index 27a3b376db..d87b287424 100644
+--- a/lib/services/systemd.c
++++ b/lib/services/systemd.c
+@@ -995,7 +995,7 @@ systemd_timeout_callback(gpointer p)
+     crm_info("%s action for systemd unit %s named '%s' timed out",
+              op->action, op->agent, op->rsc);
+     services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
+-                         "Systemd action did not complete within specified timeout");
++                         "Systemd unit action did not complete in time");
+     services__finalize_async_op(op);
+     return FALSE;
+ }
+-- 
+2.27.0
+
diff --git a/SOURCES/019-corosync-tracking.patch b/SOURCES/019-corosync-tracking.patch
new file mode 100644
index 0000000..ac3ca96
--- /dev/null
+++ b/SOURCES/019-corosync-tracking.patch
@@ -0,0 +1,29 @@
+From e8bf0161b872267f1bb7143a9866fdc15ec218f2 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse@redhat.com>
+Date: Tue, 18 Jan 2022 16:35:24 +0100
+Subject: [PATCH] Fix: corosync: Repeat corosync_cfg_trackstart
+
+corosync_cfg_trackstart can fail with CS_ERR_TRY_AGAIN failure so
+(similarly as for corosync_cfg_local_get, ...) handle failure with
+using cs_repeat macro.
+---
+ daemons/pacemakerd/pcmkd_corosync.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
+index 7990bc43c5..cd7a40321d 100644
+--- a/daemons/pacemakerd/pcmkd_corosync.c
++++ b/daemons/pacemakerd/pcmkd_corosync.c
+@@ -186,7 +186,8 @@ cluster_connect_cfg(void)
+     crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
+ 
+ #ifdef HAVE_COROSYNC_CFG_TRACKSTART
+-    rc = corosync_cfg_trackstart(cfg_handle, 0);
++    retries = 0;
++    cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
+     if (rc != CS_OK) {
+         crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
+                  cs_strerror(rc), rc);
+-- 
+2.27.0
+
diff --git a/SOURCES/020-systemd-unit.patch b/SOURCES/020-systemd-unit.patch
new file mode 100644
index 0000000..a425ae3
--- /dev/null
+++ b/SOURCES/020-systemd-unit.patch
@@ -0,0 +1,41 @@
+From e316840a7e1d2a72e3089ee194334244c959905a Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 19 Jan 2022 09:53:53 -0600
+Subject: [PATCH] Fix: pacemakerd: tweak systemd unit respawn settings
+
+If pacemaker exits immediately after starting, wait 1 second before trying to
+respawn, since the default of 100ms is a bit aggressive for a Pacemaker
+cluster.
+
+Also, allow 5 attempts in 25 seconds before giving up.
+---
+ daemons/pacemakerd/pacemaker.service.in | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
+index 0363a2259c..3fd53d9ffb 100644
+--- a/daemons/pacemakerd/pacemaker.service.in
++++ b/daemons/pacemakerd/pacemaker.service.in
+@@ -31,6 +31,9 @@ After=rsyslog.service
+ After=corosync.service
+ Requires=corosync.service
+ 
++# If Pacemaker respawns repeatedly, give up after this many tries in this time
++StartLimitBurst=5
++StartLimitIntervalSec=25s
+ 
+ [Install]
+ WantedBy=multi-user.target
+@@ -57,6 +60,9 @@ TasksMax=infinity
+ # resource.  Sending -KILL will just get the node fenced
+ SendSIGKILL=no
+ 
++# Systemd's default of respawning a failed service after 100ms is too aggressive
++RestartSec=1s
++
+ # If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
+ # admin wants to stop the cluster while pacemakerd is not running, it
+ # might be a good idea to enable the ExecStopPost directive below.
+-- 
+2.27.0
+
diff --git a/SOURCES/021-failure-messages.patch b/SOURCES/021-failure-messages.patch
new file mode 100644
index 0000000..fab1013
--- /dev/null
+++ b/SOURCES/021-failure-messages.patch
@@ -0,0 +1,1338 @@
+From 9ee3d6c9b0aba6aae022cc152a3b3472fe388fa3 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 16:44:32 -0600
+Subject: [PATCH 01/15] Refactor: fencer: add exit reason to fencing operation
+ object
+
+In order to pass a fencing action's exit reason with the action history,
+we need the exit reason in remote_fencing_op_t. Nothing sets or uses it as of
+this commit.
+---
+ daemons/fenced/fenced_remote.c    | 2 ++
+ daemons/fenced/pacemaker-fenced.h | 4 +++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 6eebb7381e..0fa9706140 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -260,6 +260,8 @@ free_remote_op(gpointer data)
+     }
+     g_list_free_full(op->automatic_list, free);
+     g_list_free(op->duplicates);
++
++    pcmk__reset_result(&op->result);
+     free(op);
+ }
+ 
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 502fcc9a29..1a5c933ea7 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2021 the Pacemaker project contributors
++ * Copyright 2009-2022 the Pacemaker project contributors
+  *
+  * This source code is licensed under the GNU General Public License version 2
+  * or later (GPLv2+) WITHOUT ANY WARRANTY.
+@@ -151,6 +151,8 @@ typedef struct remote_fencing_op_s {
+     /*! The point at which the remote operation completed(nsec) */
+     long long completed_nsec;
+ 
++    /*! The (potentially intermediate) result of the operation */
++    pcmk__action_result_t result;
+ } remote_fencing_op_t;
+ 
+ void fenced_broadcast_op_result(remote_fencing_op_t *op,
+-- 
+2.27.0
+
+
+From 97a2c318866adc5ef5e426c5c3b753df1fa3ab66 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:08:42 -0600
+Subject: [PATCH 02/15] Refactor: fencer: track full result in
+ remote_fencing_op_t
+
+Now that remote_fencing_op_t has a place for the full result,
+set it before calling finalize_op(), instead of passing a separate result
+object to finalize_op().
+
+As a bonus, this simplifies the memory management, reducing the chance of
+mistakes.
+---
+ daemons/fenced/fenced_remote.c | 161 ++++++++++++++++-----------------
+ 1 file changed, 77 insertions(+), 84 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 0fa9706140..30edbff890 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -82,8 +82,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
+ static void request_peer_fencing(remote_fencing_op_t *op,
+                                 peer_device_info_t *peer,
+                                 pcmk__action_result_t *result);
+-static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
+-                        pcmk__action_result_t *result, bool dup);
++static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
+ static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
+ static int get_op_total_timeout(const remote_fencing_op_t *op,
+                                 const peer_device_info_t *chosen_peer);
+@@ -485,7 +484,9 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
+                       other->client_name, other->originator,
+                       pcmk_exec_status_str(result->execution_status),
+                       other->id);
+-            finalize_op(other, data, result, true);
++            pcmk__set_result(&other->result, result->exit_status,
++                             result->execution_status, result->exit_reason);
++            finalize_op(other, data, true);
+ 
+         } else {
+             // Possible if (for example) it timed out already
+@@ -520,20 +521,20 @@ delegate_from_xml(xmlNode *xml)
+  *
+  * \param[in] op      Fencer operation that completed
+  * \param[in] data    If not NULL, XML reply of last delegated fencing operation
+- * \param[in] result  Full operation result
+  * \param[in] dup     Whether this operation is a duplicate of another
+  *                    (in which case, do not broadcast the result)
++ *
++ *  \note The operation result should be set before calling this function.
+  */
+ static void
+-finalize_op(remote_fencing_op_t *op, xmlNode *data,
+-            pcmk__action_result_t *result, bool dup)
++finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
+ {
+     int level = LOG_ERR;
+     const char *subt = NULL;
+     xmlNode *local_data = NULL;
+     gboolean op_merged = FALSE;
+ 
+-    CRM_CHECK((op != NULL) && (result != NULL), return);
++    CRM_CHECK((op != NULL), return);
+ 
+     if (op->notify_sent) {
+         // Most likely, this is a timed-out action that eventually completed
+@@ -557,11 +558,11 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
+         local_data = data;
+ 
+     } else if (op->delegate == NULL) {
+-        switch (result->execution_status) {
++        switch (op->result.execution_status) {
+             case PCMK_EXEC_NO_FENCE_DEVICE:
+                 break;
+             case PCMK_EXEC_INVALID:
+-                if (result->exit_status == CRM_EX_EXPIRED) {
++                if (op->result.exit_status == CRM_EX_EXPIRED) {
+                     break;
+                 }
+                 // else fall through
+@@ -581,12 +582,12 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
+     subt = crm_element_value(data, F_SUBTYPE);
+     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+         /* Defer notification until the bcast message arrives */
+-        fenced_broadcast_op_result(op, result, op_merged);
++        fenced_broadcast_op_result(op, &op->result, op_merged);
+         free_xml(local_data);
+         return;
+     }
+ 
+-    if (pcmk__result_ok(result) || dup
++    if (pcmk__result_ok(&op->result) || dup
+         || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         level = LOG_NOTICE;
+     }
+@@ -595,16 +596,17 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
+                (op->target? op->target : ""),
+                (op->delegate? op->delegate : "unknown node"),
+                op->client_name, op->originator,
+-               (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
+-               pcmk_exec_status_str(result->execution_status),
+-               ((result->exit_reason == NULL)? "" : ": "),
+-               ((result->exit_reason == NULL)? "" : result->exit_reason),
++               (op_merged? " (merged)" : ""),
++               crm_exit_str(op->result.exit_status),
++               pcmk_exec_status_str(op->result.execution_status),
++               ((op->result.exit_reason == NULL)? "" : ": "),
++               ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
+                op->id);
+ 
+-    handle_local_reply_and_notify(op, data, result);
++    handle_local_reply_and_notify(op, data, &op->result);
+ 
+     if (!dup) {
+-        finalize_op_duplicates(op, data, result);
++        finalize_op_duplicates(op, data, &op->result);
+     }
+ 
+     /* Free non-essential parts of the record
+@@ -634,7 +636,6 @@ static gboolean
+ remote_op_watchdog_done(gpointer userdata)
+ {
+     remote_fencing_op_t *op = userdata;
+-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     op->op_timer_one = 0;
+ 
+@@ -642,8 +643,8 @@ remote_op_watchdog_done(gpointer userdata)
+                CRM_XS " id=%.8s",
+                op->action, op->target, op->client_name, op->id);
+     op->state = st_done;
+-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+-    finalize_op(op, NULL, &result, false);
++    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    finalize_op(op, NULL, false);
+     return G_SOURCE_REMOVE;
+ }
+ 
+@@ -676,8 +677,6 @@ remote_op_timeout_one(gpointer userdata)
+ static void
+ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
+ {
+-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+     op->op_timer_total = 0;
+ 
+     crm_debug("Action '%s' targeting %s for client %s timed out "
+@@ -690,13 +689,12 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
+          * devices, and return success.
+          */
+         op->state = st_done;
+-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+     } else {
+         op->state = st_failed;
+-        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
++        pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+     }
+-    finalize_op(op, NULL, &result, false);
+-    pcmk__reset_result(&result);
++    finalize_op(op, NULL, false);
+ }
+ 
+ /*!
+@@ -1094,13 +1092,9 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
+     set_fencing_completed(op);
+     op->delegate = strdup("a human");
+ 
+-    {
+-        // For the fencer's purposes, the fencing operation is done
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+-        finalize_op(op, msg, &result, false);
+-    }
++    // For the fencer's purposes, the fencing operation is done
++    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    finalize_op(op, msg, false);
+ 
+     /* For the requester's purposes, the operation is still pending. The
+      * actual result will be sent asynchronously via the operation's done_cb().
+@@ -1279,16 +1273,11 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
+     switch (op->state) {
+         case st_failed:
+             // advance_topology_level() exhausted levels
+-            {
+-                pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-                pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+-                                 "All topology levels failed");
+-                crm_warn("Could not request peer fencing (%s) targeting %s "
+-                         CRM_XS " id=%.8s", op->action, op->target, op->id);
+-                finalize_op(op, NULL, &result, false);
+-                pcmk__reset_result(&result);
+-            }
++            pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
++                             "All topology levels failed");
++            crm_warn("Could not request peer fencing (%s) targeting %s "
++                     CRM_XS " id=%.8s", op->action, op->target, op->id);
++            finalize_op(op, NULL, false);
+             return op;
+ 
+         case st_duplicate:
+@@ -1613,10 +1602,6 @@ static void
+ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+                                  xmlNode *msg)
+ {
+-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+-
+     /* Advance to the next device at this topology level, if any */
+     if (op->devices) {
+         op->devices = op->devices->next;
+@@ -1644,6 +1629,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+     }
+ 
+     if (op->devices) {
++        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
++
++        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++
+         /* Necessary devices remain, so execute the next one */
+         crm_trace("Next targeting %s on behalf of %s@%s",
+                   op->target, op->client_name, op->originator);
+@@ -1659,7 +1648,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+         crm_trace("Marking complex fencing op targeting %s as complete",
+                   op->target);
+         op->state = st_done;
+-        finalize_op(op, msg, &result, false);
++        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++        finalize_op(op, msg, false);
+     }
+ }
+ 
+@@ -1868,7 +1858,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+         }
+ 
+         op->state = st_failed;
+-        finalize_op(op, NULL, result, false);
++        pcmk__set_result(&op->result, result->exit_status,
++                         result->execution_status, result->exit_reason);
++        finalize_op(op, NULL, false);
+ 
+     } else {
+         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
+@@ -2245,31 +2237,34 @@ fenced_process_fencing_reply(xmlNode *msg)
+         /* Could be for an event that began before we started */
+         /* TODO: Record the op for later querying */
+         crm_info("Received peer result of unknown or expired operation %s", id);
+-        goto done;
++        pcmk__reset_result(&result);
++        return;
+     }
+ 
++    op->result = result; // The operation takes ownership of the result
++
+     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
+         crm_err("Received outdated reply for device %s (instead of %s) to "
+                 "fence (%s) %s. Operation already timed out at peer level.",
+                 device, (const char *) op->devices->data, op->action, op->target);
+-        goto done;
++        return;
+     }
+ 
+     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
+         crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
+                   CRM_XS " id=%.8s",
+                   op->action, op->target, op->client_name, op->originator,
+-                  pcmk_exec_status_str(result.execution_status),
+-                  (result.exit_reason == NULL)? "" : " (",
+-                  (result.exit_reason == NULL)? "" : result.exit_reason,
+-                  (result.exit_reason == NULL)? "" : ")", op->id);
+-        if (pcmk__result_ok(&result)) {
++                  pcmk_exec_status_str(op->result.execution_status),
++                  (op->result.exit_reason == NULL)? "" : " (",
++                  (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
++                  (op->result.exit_reason == NULL)? "" : ")", op->id);
++        if (pcmk__result_ok(&op->result)) {
+             op->state = st_done;
+         } else {
+             op->state = st_failed;
+         }
+-        finalize_op(op, msg, &result, false);
+-        goto done;
++        finalize_op(op, msg, false);
++        return;
+ 
+     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
+         /* If this isn't a remote level broadcast, and we are not the
+@@ -2277,7 +2272,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+         crm_err("Received non-broadcast fencing result for operation %.8s "
+                 "we do not own (device %s targeting %s)",
+                 op->id, device, op->target);
+-        goto done;
++        return;
+     }
+ 
+     if (pcmk_is_set(op->call_options, st_opt_topology)) {
+@@ -2286,58 +2281,58 @@ fenced_process_fencing_reply(xmlNode *msg)
+         crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
+                    op->action, op->target, device, op->client_name,
+                    op->originator,
+-                   pcmk_exec_status_str(result.execution_status),
+-                  (result.exit_reason == NULL)? "" : " (",
+-                  (result.exit_reason == NULL)? "" : result.exit_reason,
+-                  (result.exit_reason == NULL)? "" : ")");
++                   pcmk_exec_status_str(op->result.execution_status),
++                  (op->result.exit_reason == NULL)? "" : " (",
++                  (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
++                  (op->result.exit_reason == NULL)? "" : ")");
+ 
+         /* We own the op, and it is complete. broadcast the result to all nodes
+          * and notify our local clients. */
+         if (op->state == st_done) {
+-            finalize_op(op, msg, &result, false);
+-            goto done;
++            finalize_op(op, msg, false);
++            return;
+         }
+ 
+-        if ((op->phase == 2) && !pcmk__result_ok(&result)) {
++        if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
+             /* A remapped "on" failed, but the node was already turned off
+              * successfully, so ignore the error and continue.
+              */
+             crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
+                      "after successful 'off'",
+-                     device, pcmk_exec_status_str(result.execution_status),
+-                     (result.exit_reason == NULL)? "" : ": ",
+-                     (result.exit_reason == NULL)? "" : result.exit_reason,
++                     device, pcmk_exec_status_str(op->result.execution_status),
++                     (op->result.exit_reason == NULL)? "" : ": ",
++                     (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
+                      op->target);
+-            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++            pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         }
+ 
+-        if (pcmk__result_ok(&result)) {
++        if (pcmk__result_ok(&op->result)) {
+             /* An operation completed successfully. Try another device if
+              * necessary, otherwise mark the operation as done. */
+             advance_topology_device_in_level(op, device, msg);
+-            goto done;
++            return;
+         } else {
+             /* This device failed, time to try another topology level. If no other
+              * levels are available, mark this operation as failed and report results. */
+             if (advance_topology_level(op, false) != pcmk_rc_ok) {
+                 op->state = st_failed;
+-                finalize_op(op, msg, &result, false);
+-                goto done;
++                finalize_op(op, msg, false);
++                return;
+             }
+         }
+ 
+-    } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
++    } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
+         crm_trace("All done for %s", op->target);
+         op->state = st_done;
+-        finalize_op(op, msg, &result, false);
+-        goto done;
++        finalize_op(op, msg, false);
++        return;
+ 
+-    } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
++    } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
+                && (op->devices == NULL)) {
+         /* If the operation timed out don't bother retrying other peers. */
+         op->state = st_failed;
+-        finalize_op(op, msg, &result, false);
+-        goto done;
++        finalize_op(op, msg, false);
++        return;
+ 
+     } else {
+         /* fall-through and attempt other fencing action using another peer */
+@@ -2346,10 +2341,8 @@ fenced_process_fencing_reply(xmlNode *msg)
+     /* Retry on failure */
+     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
+               op->target, op->originator, op->client_name,
+-              pcmk_exec_status_str(result.execution_status));
+-    request_peer_fencing(op, NULL, &result);
+-done:
+-    pcmk__reset_result(&result);
++              pcmk_exec_status_str(op->result.execution_status));
++    request_peer_fencing(op, NULL, &op->result);
+ }
+ 
+ gboolean
+-- 
+2.27.0
+
+
+From c59d062154f7c9e15e90929a20ea244d7efd7247 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:11:12 -0600
+Subject: [PATCH 03/15] Refactor: fencer: drop redundant argument from
+ finalize_op_duplicates()
+
+... now that the result is in the op
+---
+ daemons/fenced/fenced_remote.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 30edbff890..8b496e1042 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -468,11 +468,9 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+  *
+  * \param[in] op         Fencer operation that completed
+  * \param[in] data       Top-level XML to add notification to
+- * \param[in] result     Full operation result
+  */
+ static void
+-finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
+-                       pcmk__action_result_t *result)
++finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
+ {
+     for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
+         remote_fencing_op_t *other = iter->data;
+@@ -482,10 +480,11 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
+             crm_debug("Performing duplicate notification for %s@%s: %s "
+                       CRM_XS " id=%.8s",
+                       other->client_name, other->originator,
+-                      pcmk_exec_status_str(result->execution_status),
++                      pcmk_exec_status_str(op->result.execution_status),
+                       other->id);
+-            pcmk__set_result(&other->result, result->exit_status,
+-                             result->execution_status, result->exit_reason);
++            pcmk__set_result(&other->result, op->result.exit_status,
++                             op->result.execution_status,
++                             op->result.exit_reason);
+             finalize_op(other, data, true);
+ 
+         } else {
+@@ -606,7 +605,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
+     handle_local_reply_and_notify(op, data, &op->result);
+ 
+     if (!dup) {
+-        finalize_op_duplicates(op, data, &op->result);
++        finalize_op_duplicates(op, data);
+     }
+ 
+     /* Free non-essential parts of the record
+-- 
+2.27.0
+
+
+From 6c49675855323a52a534afa112a0861ba2e3b1ad Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:15:17 -0600
+Subject: [PATCH 04/15] Refactor: fencer: drop redundant argument from
+ fenced_broadcast_op_result()
+
+... now that the op includes the result
+---
+ daemons/fenced/fenced_history.c   | 9 +++------
+ daemons/fenced/fenced_remote.c    | 8 +++-----
+ daemons/fenced/pacemaker-fenced.h | 3 +--
+ 3 files changed, 7 insertions(+), 13 deletions(-)
+
+diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
+index 0157deadb3..5cacf36ca8 100644
+--- a/daemons/fenced/fenced_history.c
++++ b/daemons/fenced/fenced_history.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2009-2021 the Pacemaker project contributors
++ * Copyright 2009-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -359,8 +359,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+     }
+ 
+     if (remote_history) {
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+         init_stonith_remote_op_hash_table(&stonith_remote_op_list);
+ 
+         updated |= g_hash_table_size(remote_history);
+@@ -378,10 +376,10 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+                 /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
+                  * from setting a delegate
+                  */
+-                pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
++                pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
+                                  "Initiated by earlier fencer "
+                                  "process and presumed failed");
+-                fenced_broadcast_op_result(op, &result, false);
++                fenced_broadcast_op_result(op, false);
+             }
+ 
+             g_hash_table_iter_steal(&iter);
+@@ -396,7 +394,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+              */
+         }
+ 
+-        pcmk__reset_result(&result);
+         g_hash_table_destroy(remote_history); /* remove what is left */
+     }
+ 
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 8b496e1042..fb5a5e980e 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -390,16 +390,14 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
+  * \brief Broadcast a fence result notification to all CPG peers
+  *
+  * \param[in] op         Fencer operation that completed
+- * \param[in] result     Full operation result
+  * \param[in] op_merged  Whether this operation is a duplicate of another
+  */
+ void
+-fenced_broadcast_op_result(remote_fencing_op_t *op,
+-                           pcmk__action_result_t *result, bool op_merged)
++fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
+ {
+     static int count = 0;
+     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
+-    xmlNode *notify_data = fencing_result2xml(op, result);
++    xmlNode *notify_data = fencing_result2xml(op, &op->result);
+ 
+     count++;
+     crm_trace("Broadcasting result to peers");
+@@ -581,7 +579,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
+     subt = crm_element_value(data, F_SUBTYPE);
+     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
+         /* Defer notification until the bcast message arrives */
+-        fenced_broadcast_op_result(op, &op->result, op_merged);
++        fenced_broadcast_op_result(op, op_merged);
+         free_xml(local_data);
+         return;
+     }
+diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
+index 1a5c933ea7..6213407da3 100644
+--- a/daemons/fenced/pacemaker-fenced.h
++++ b/daemons/fenced/pacemaker-fenced.h
+@@ -155,8 +155,7 @@ typedef struct remote_fencing_op_s {
+     pcmk__action_result_t result;
+ } remote_fencing_op_t;
+ 
+-void fenced_broadcast_op_result(remote_fencing_op_t *op,
+-                                pcmk__action_result_t *result, bool op_merged);
++void fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged);
+ 
+ // Fencer-specific client flags
+ enum st_client_flags {
+-- 
+2.27.0
+
+
+From 73994fc740b8833457b130368db479502d49f285 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:17:33 -0600
+Subject: [PATCH 05/15] Refactor: fencer: drop redundant argument from
+ handle_local_reply_and_notify()
+
+... now that the op includes the result
+---
+ daemons/fenced/fenced_remote.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index fb5a5e980e..2621cb2f19 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -424,11 +424,9 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
+  *
+  * \param[in] op         Fencer operation that completed
+  * \param[in] data       Top-level XML to add notification to
+- * \param[in] result     Full operation result
+  */
+ static void
+-handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+-                              pcmk__action_result_t *result)
++handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
+ {
+     xmlNode *notify_data = NULL;
+     xmlNode *reply = NULL;
+@@ -443,15 +441,15 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
+     crm_xml_add(data, F_STONITH_TARGET, op->target);
+     crm_xml_add(data, F_STONITH_OPERATION, op->action);
+ 
+-    reply = fenced_construct_reply(op->request, data, result);
++    reply = fenced_construct_reply(op->request, data, &op->result);
+     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
+ 
+     /* Send fencing OP reply to local client that initiated fencing */
+     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
+ 
+     /* bcast to all local clients that the fencing operation happend */
+-    notify_data = fencing_result2xml(op, result);
+-    fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
++    notify_data = fencing_result2xml(op, &op->result);
++    fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
+     free_xml(notify_data);
+     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+ 
+@@ -600,7 +598,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
+                ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
+                op->id);
+ 
+-    handle_local_reply_and_notify(op, data, &op->result);
++    handle_local_reply_and_notify(op, data);
+ 
+     if (!dup) {
+         finalize_op_duplicates(op, data);
+-- 
+2.27.0
+
+
+From 194056d18d3b550d3a53b94d558ceed03b5e5442 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:18:27 -0600
+Subject: [PATCH 06/15] Refactor: fencer: drop redundant argument from
+ fencing_result2xml()
+
+... now that the op includes the result
+---
+ daemons/fenced/fenced_remote.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 2621cb2f19..8d4f53eef6 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -362,13 +362,12 @@ undo_op_remap(remote_fencing_op_t *op)
+  * \brief Create notification data XML for a fencing operation result
+  *
+  * \param[in] op      Fencer operation that completed
+- * \param[in] result  Full operation result
+  *
+  * \return Newly created XML to add as notification data
+  * \note The caller is responsible for freeing the result.
+  */
+ static xmlNode *
+-fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
++fencing_result2xml(remote_fencing_op_t *op)
+ {
+     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+ 
+@@ -381,7 +380,7 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
+     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
+     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
+ 
+-    stonith__xe_set_result(notify_data, result);
++    stonith__xe_set_result(notify_data, &op->result);
+     return notify_data;
+ }
+ 
+@@ -397,7 +396,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
+ {
+     static int count = 0;
+     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
+-    xmlNode *notify_data = fencing_result2xml(op, &op->result);
++    xmlNode *notify_data = fencing_result2xml(op);
+ 
+     count++;
+     crm_trace("Broadcasting result to peers");
+@@ -448,7 +447,7 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
+     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
+ 
+     /* bcast to all local clients that the fencing operation happend */
+-    notify_data = fencing_result2xml(op, &op->result);
++    notify_data = fencing_result2xml(op);
+     fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
+     free_xml(notify_data);
+     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
+-- 
+2.27.0
+
+
+From c5d38cb201a1219ca95127cba9c3a778e31966a2 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:35:43 -0600
+Subject: [PATCH 07/15] Refactor: fencer: drop redundant argument from
+ request_peer_fencing()
+
+... now that the op includes the result
+---
+ daemons/fenced/fenced_remote.c | 66 +++++++++++++---------------------
+ 1 file changed, 25 insertions(+), 41 deletions(-)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 8d4f53eef6..7fb7695fba 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -80,8 +80,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
+                                   int call_options);
+ 
+ static void request_peer_fencing(remote_fencing_op_t *op,
+-                                peer_device_info_t *peer,
+-                                pcmk__action_result_t *result);
++                                 peer_device_info_t *peer);
+ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
+ static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
+ static int get_op_total_timeout(const remote_fencing_op_t *op,
+@@ -646,18 +645,16 @@ static gboolean
+ remote_op_timeout_one(gpointer userdata)
+ {
+     remote_fencing_op_t *op = userdata;
+-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     op->op_timer_one = 0;
+ 
+     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
+                " id=%.8s", op->action, op->target, op->client_name, op->id);
+-    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
++    pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+                      "Peer did not return fence result within timeout");
+ 
+-
+     // Try another device, if appropriate
+-    request_peer_fencing(op, NULL, &result);
++    request_peer_fencing(op, NULL);
+     return FALSE;
+ }
+ 
+@@ -730,13 +727,10 @@ remote_op_query_timeout(gpointer data)
+         crm_debug("Operation %.8s targeting %s already in progress",
+                   op->id, op->target);
+     } else if (op->query_results) {
+-        // Result won't be used in this case, but we need to pass something
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+         // Query succeeded, so attempt the actual fencing
+         crm_debug("Query %.8s targeting %s complete (state=%s)",
+                   op->id, op->target, stonith_op_state_str(op->state));
+-        request_peer_fencing(op, NULL, &result);
++        request_peer_fencing(op, NULL);
+     } else {
+         crm_debug("Query %.8s targeting %s timed out (state=%s)",
+                   op->id, op->target, stonith_op_state_str(op->state));
+@@ -1622,11 +1616,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+         op_phase_on(op);
+     }
+ 
+-    if (op->devices) {
+-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+-
+-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    // This function is only called if the previous device succeeded
++    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ 
++    if (op->devices) {
+         /* Necessary devices remain, so execute the next one */
+         crm_trace("Next targeting %s on behalf of %s@%s",
+                   op->target, op->client_name, op->originator);
+@@ -1636,13 +1629,12 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
+             op->delay = 0;
+         }
+ 
+-        request_peer_fencing(op, NULL, &result);
++        request_peer_fencing(op, NULL);
+     } else {
+         /* We're done with all devices and phases, so finalize operation */
+         crm_trace("Marking complex fencing op targeting %s as complete",
+                   op->target);
+         op->state = st_done;
+-        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+         finalize_op(op, msg, false);
+     }
+ }
+@@ -1673,13 +1665,9 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
+  * \param[in] op      Fencing operation to be executed
+  * \param[in] peer    If NULL or topology is in use, choose best peer to execute
+  *                    the fencing, otherwise use this peer
+- * \param[in] result  Full result of previous failed attempt, if any (used as
+- *                    final result only if a previous attempt failed, topology
+- *                    is not in use, and no devices remain to be attempted)
+  */
+ static void
+-request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+-                    pcmk__action_result_t *result)
++request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
+ {
+     const char *device = NULL;
+     int timeout;
+@@ -1822,27 +1810,26 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+             }
+         }
+ 
+-        // This is the only case in which result will be used
+-        CRM_CHECK(result != NULL, return);
+-
+         if (op->state == st_query) {
+             crm_info("No peers (out of %d) have devices capable of fencing "
+                      "(%s) %s for client %s " CRM_XS " state=%s",
+                      op->replies, op->action, op->target, op->client_name,
+                      stonith_op_state_str(op->state));
+ 
+-            pcmk__reset_result(result);
+-            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+-                             NULL);
++            pcmk__reset_result(&op->result);
++            pcmk__set_result(&op->result, CRM_EX_ERROR,
++                             PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+         } else {
+             if (pcmk_is_set(op->call_options, st_opt_topology)) {
+-                pcmk__reset_result(result);
+-                pcmk__set_result(result, CRM_EX_ERROR,
++                pcmk__reset_result(&op->result);
++                pcmk__set_result(&op->result, CRM_EX_ERROR,
+                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
+             }
+-            /* ... else use result provided by caller -- overwriting it with
+-               PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
+-               setting the correct delegate if needed.
++            /* ... else use existing result from previous failed attempt
++             * (topology is not in use, and no devices remain to be attempted).
++             * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
++             * prevent finalize_op() from setting the correct delegate if
++             * needed.
+              */
+ 
+             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
+@@ -1852,8 +1839,6 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
+         }
+ 
+         op->state = st_failed;
+-        pcmk__set_result(&op->result, result->exit_status,
+-                         result->execution_status, result->exit_reason);
+         finalize_op(op, NULL, false);
+ 
+     } else {
+@@ -2104,7 +2089,6 @@ process_remote_stonith_query(xmlNode * msg)
+     peer_device_info_t *peer = NULL;
+     uint32_t replies_expected;
+     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
+-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+     CRM_CHECK(dev != NULL, return -EPROTO);
+ 
+@@ -2139,7 +2123,7 @@ process_remote_stonith_query(xmlNode * msg)
+         peer = add_result(op, host, ndevices, dev);
+     }
+ 
+-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
++    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ 
+     if (pcmk_is_set(op->call_options, st_opt_topology)) {
+         /* If we start the fencing before all the topology results are in,
+@@ -2148,12 +2132,12 @@ process_remote_stonith_query(xmlNode * msg)
+         if (op->state == st_query && all_topology_devices_found(op)) {
+             /* All the query results are in for the topology, start the fencing ops. */
+             crm_trace("All topology devices found");
+-            request_peer_fencing(op, peer, &result);
++            request_peer_fencing(op, peer);
+ 
+         } else if (have_all_replies) {
+             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
+                      replies_expected, op->replies);
+-            request_peer_fencing(op, NULL, &result);
++            request_peer_fencing(op, NULL);
+         }
+ 
+     } else if (op->state == st_query) {
+@@ -2165,12 +2149,12 @@ process_remote_stonith_query(xmlNode * msg)
+             /* we have a verified device living on a peer that is not the target */
+             crm_trace("Found %d verified device%s",
+                       nverified, pcmk__plural_s(nverified));
+-            request_peer_fencing(op, peer, &result);
++            request_peer_fencing(op, peer);
+ 
+         } else if (have_all_replies) {
+             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
+                      replies_expected, op->replies);
+-            request_peer_fencing(op, NULL, &result);
++            request_peer_fencing(op, NULL);
+ 
+         } else {
+             crm_trace("Waiting for more peer results before launching fencing operation");
+@@ -2336,7 +2320,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
+               op->target, op->originator, op->client_name,
+               pcmk_exec_status_str(op->result.execution_status));
+-    request_peer_fencing(op, NULL, &op->result);
++    request_peer_fencing(op, NULL);
+ }
+ 
+ gboolean
+-- 
+2.27.0
+
+
+From be0a0b652c13161a82b05d3104449b7bfc06e8ac Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 17:56:24 -0600
+Subject: [PATCH 08/15] Feature: fencer: track full result in fencing history
+
+Add fencing operation results when creating XML in
+stonith_local_history_diff_and_merge(), and parse the results from the received
+XML in stonith_xml_history_to_list().
+
+With this, the fencer now always has full results in its op list, and returns
+them in the reply for STONITH_OP_FENCE_HISTORY requests (though nothing uses
+that as of this commit).
+---
+ daemons/fenced/fenced_history.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
+index 5cacf36ca8..3ebf016e67 100644
+--- a/daemons/fenced/fenced_history.c
++++ b/daemons/fenced/fenced_history.c
+@@ -257,6 +257,7 @@ stonith_xml_history_to_list(xmlNode *history)
+         op->completed_nsec = completed_nsec;
+         crm_element_value_int(xml_op, F_STONITH_STATE, &state);
+         op->state = (enum op_state) state;
++        stonith__xe_get_result(xml_op, &op->result);
+ 
+         g_hash_table_replace(rv, id, op);
+         CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
+@@ -355,6 +356,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
+                 crm_xml_add_ll(entry, F_STONITH_DATE, op->completed);
+                 crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec);
+                 crm_xml_add_int(entry, F_STONITH_STATE, op->state);
++                stonith__xe_set_result(entry, &op->result);
+             }
+     }
+ 
+-- 
+2.27.0
+
+
+From afc5292036e212bcfc7475893e0b326b2a69ac58 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 22 Dec 2021 17:17:21 -0600
+Subject: [PATCH 09/15] API: libstonithd: add exit_reason member to
+ stonith_history_t
+
+not yet used, but will be
+---
+ include/crm/stonith-ng.h | 3 ++-
+ lib/fencing/st_client.c  | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
+index 3fe9cf54f8..2c79bfa579 100644
+--- a/include/crm/stonith-ng.h
++++ b/include/crm/stonith-ng.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2004-2021 the Pacemaker project contributors
++ * Copyright 2004-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -111,6 +111,7 @@ typedef struct stonith_history_s {
+     time_t completed;
+     struct stonith_history_s *next;
+     long completed_nsec;
++    char *exit_reason;
+ } stonith_history_t;
+ 
+ typedef struct stonith_s stonith_t;
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 718739b321..57a2e03361 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2004-2021 the Pacemaker project contributors
++ * Copyright 2004-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -735,6 +735,7 @@ void stonith_history_free(stonith_history_t *history)
+         free(hp->origin);
+         free(hp->delegate);
+         free(hp->client);
++        free(hp->exit_reason);
+     }
+ }
+ 
+-- 
+2.27.0
+
+
+From 1b9e2896322849002a5c0a3a34c9375ea32571d6 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 6 Jan 2022 18:04:15 -0600
+Subject: [PATCH 10/15] Feature: fencing: return exit reason with fencing
+ history
+
+libstonithd's stonith_t:cmds->history() method now parses exit reasons from the
+fencer reply, and returns them in the stonith_history_t results.
+---
+ lib/fencing/st_client.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
+index 57a2e03361..d229b34805 100644
+--- a/lib/fencing/st_client.c
++++ b/lib/fencing/st_client.c
+@@ -698,6 +698,7 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
+             stonith_history_t *kvp;
+             long long completed;
+             long long completed_nsec = 0L;
++            pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+ 
+             kvp = calloc(1, sizeof(stonith_history_t));
+             kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
+@@ -711,6 +712,11 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
+             kvp->completed_nsec = completed_nsec;
+             crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
+ 
++            stonith__xe_get_result(op, &result);
++            kvp->exit_reason = result.exit_reason;
++            result.exit_reason = NULL;
++            pcmk__reset_result(&result);
++
+             if (last) {
+                 last->next = kvp;
+             } else {
+-- 
+2.27.0
+
+
+From ba4e77242e9be4ebeb2843b444ee4afad43c29f3 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 7 Jan 2022 09:44:39 -0600
+Subject: [PATCH 11/15] Feature: fencing: display exit reasons with failed
+ fencing events
+
+... when available
+---
+ lib/fencing/st_output.c | 20 ++++++++++++++++----
+ tools/crm_mon_curses.c  |  9 +++++++--
+ 2 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c
+index e484278867..18924d795d 100644
+--- a/lib/fencing/st_output.c
++++ b/lib/fencing/st_output.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2019-2021 the Pacemaker project contributors
++ * Copyright 2019-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -11,6 +11,7 @@
+ #include <stdarg.h>
+ 
+ #include <crm/stonith-ng.h>
++#include <crm/msg_xml.h>
+ #include <crm/common/iso8601.h>
+ #include <crm/common/util.h>
+ #include <crm/common/xml.h>
+@@ -263,8 +264,12 @@ stonith_event_html(pcmk__output_t *out, va_list args) {
+             char *failed_s = time_t_string(event->completed);
+ 
+             out->list_item(out, "failed-stonith-event",
+-                           "%s of %s failed : delegate=%s, client=%s, origin=%s, %s='%s' %s",
++                           "%s of %s failed%s%s%s: "
++                           "delegate=%s, client=%s, origin=%s, %s='%s' %s",
+                            stonith_action_str(event->action), event->target,
++                           (event->exit_reason == NULL)? "" : " (",
++                           (event->exit_reason == NULL)? "" : event->exit_reason,
++                           (event->exit_reason == NULL)? "" : ")",
+                            event->delegate ? event->delegate : "",
+                            event->client, event->origin,
+                            full_history ? "completed" : "last-failed",
+@@ -296,8 +301,13 @@ stonith_event_text(pcmk__output_t *out, va_list args) {
+ 
+     switch (event->state) {
+         case st_failed:
+-            pcmk__indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
++            pcmk__indented_printf(out,
++                                  "%s of %s failed%s%s%s: "
++                                  "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
+                                   stonith_action_str(event->action), event->target,
++                                  (event->exit_reason == NULL)? "" : " (",
++                                  (event->exit_reason == NULL)? "" : event->exit_reason,
++                                  (event->exit_reason == NULL)? "" : ")",
+                                   event->delegate ? event->delegate : "",
+                                   event->client, event->origin,
+                                   full_history ? "completed" : "last-failed", buf,
+@@ -341,7 +351,9 @@ stonith_event_xml(pcmk__output_t *out, va_list args) {
+ 
+     switch (event->state) {
+         case st_failed:
+-            crm_xml_add(node, "status", "failed");
++            pcmk__xe_set_props(node, "status", "failed",
++                               XML_LRM_ATTR_EXIT_REASON, event->exit_reason,
++                               NULL);
+             break;
+ 
+         case st_done:
+diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c
+index bae3710c44..73c8516a8c 100644
+--- a/tools/crm_mon_curses.c
++++ b/tools/crm_mon_curses.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright 2019-2021 the Pacemaker project contributors
++ * Copyright 2019-2022 the Pacemaker project contributors
+  *
+  * The version control history for this file may have further details.
+  *
+@@ -463,8 +463,13 @@ stonith_event_console(pcmk__output_t *out, va_list args) {
+ 
+     switch (event->state) {
+         case st_failed:
+-            curses_indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s'%s\n",
++            curses_indented_printf(out,
++                                   "%s of %s failed%s%s%s: "
++                                   "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
+                                    stonith_action_str(event->action), event->target,
++                                   (event->exit_reason == NULL)? "" : " (",
++                                   (event->exit_reason == NULL)? "" : event->exit_reason,
++                                   (event->exit_reason == NULL)? "" : ")",
+                                    event->delegate ? event->delegate : "",
+                                    event->client, event->origin,
+                                    full_history ? "completed" : "last-failed", buf,
+-- 
+2.27.0
+
+
+From 8105fb4a3a786780fdf85b3d0308eaf6df1ea434 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 7 Jan 2022 09:45:22 -0600
+Subject: [PATCH 12/15] Low: schemas: copy fence-event API schema in
+ preparation for changes
+
+---
+ include/crm/common/output_internal.h |  2 +-
+ xml/api/fence-event-2.15.rng         | 33 ++++++++++++++++++++++++++++
+ 2 files changed, 34 insertions(+), 1 deletion(-)
+ create mode 100644 xml/api/fence-event-2.15.rng
+
+diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h
+index 479f0e4b43..8c5dcee17c 100644
+--- a/include/crm/common/output_internal.h
++++ b/include/crm/common/output_internal.h
+@@ -27,7 +27,7 @@ extern "C" {
+ #  include <glib.h>
+ #  include <crm/common/results.h>
+ 
+-#  define PCMK__API_VERSION "2.14"
++#  define PCMK__API_VERSION "2.15"
+ 
+ #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS)
+ #  define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS)))
+diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
+new file mode 100644
+index 0000000000..e54687cd25
+--- /dev/null
++++ b/xml/api/fence-event-2.15.rng
+@@ -0,0 +1,33 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<grammar xmlns="http://relaxng.org/ns/structure/1.0"
++         datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
++
++    <start>
++        <ref name="fencing-history-event"/>
++    </start>
++
++    <define name="fencing-history-event">
++        <element name="fence_event">
++            <attribute name="status">
++                <choice>
++                    <value>failed</value>
++                    <value>success</value>
++                    <value>pending</value>
++                </choice>
++            </attribute>
++            <optional>
++                <attribute name="extended-status"> <text /> </attribute>
++            </optional>
++            <optional>
++                <attribute name="delegate"> <text /> </attribute>
++            </optional>
++            <attribute name="action"> <text /> </attribute>
++            <attribute name="target"> <text /> </attribute>
++            <attribute name="client"> <text /> </attribute>
++            <attribute name="origin"> <text /> </attribute>
++            <optional>
++                <attribute name="completed"> <text /> </attribute>
++            </optional>
++        </element>
++    </define>
++</grammar>
+-- 
+2.27.0
+
+
+From 46dd9b74d2ee8f7ab70a0c7fe3a998954d4029e8 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 7 Jan 2022 09:47:16 -0600
+Subject: [PATCH 13/15] Low: schemas: update fence-event API schema for recent
+ change
+
+---
+ xml/api/fence-event-2.15.rng | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
+index e54687cd25..8e000cafa5 100644
+--- a/xml/api/fence-event-2.15.rng
++++ b/xml/api/fence-event-2.15.rng
+@@ -18,6 +18,9 @@
+             <optional>
+                 <attribute name="extended-status"> <text /> </attribute>
+             </optional>
++            <optional>
++                <attribute name="exit-reason"> <text /> </attribute>
++            </optional>
+             <optional>
+                 <attribute name="delegate"> <text /> </attribute>
+             </optional>
+-- 
+2.27.0
+
+
+From 350e71772f67f28af6b67f864cbabc481730035c Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Fri, 7 Jan 2022 11:32:09 -0600
+Subject: [PATCH 14/15] Build: libstonithd: bump shared library version
+
+... for stonith_history_t change since 2.1.2.
+
+The struct should only ever be returned by the library as a pointer, so the
+changes can be considered backward-compatible. Normally we wouldn't bump shared
+library versions mid-cycle, but this will simplify expected backports of this
+change.
+---
+ lib/fencing/Makefile.am | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am
+index 1ffa3e051b..a10ddb88ec 100644
+--- a/lib/fencing/Makefile.am
++++ b/lib/fencing/Makefile.am
+@@ -2,7 +2,7 @@
+ # Original Author: Sun Jiang Dong <sunjd@cn.ibm.com>
+ # Copyright 2004 International Business Machines
+ #
+-# with later changes copyright 2004-2021 the Pacemaker project contributors.
++# with later changes copyright 2004-2022 the Pacemaker project contributors.
+ # The version control history for this file may have further details.
+ #
+ # This source code is licensed under the GNU General Public License version 2
+@@ -14,7 +14,7 @@ noinst_HEADERS		= fencing_private.h
+ 
+ lib_LTLIBRARIES		= libstonithd.la
+ 
+-libstonithd_la_LDFLAGS	= -version-info 33:0:7
++libstonithd_la_LDFLAGS	= -version-info 34:0:8
+ 
+ libstonithd_la_CFLAGS	= $(CFLAGS_HARDENED_LIB)
+ libstonithd_la_LDFLAGS	+= $(LDFLAGS_HARDENED_LIB)
+-- 
+2.27.0
+
+
+From 63ea88620a62ff0759560a02bb5e284ebdd03eb6 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Wed, 19 Jan 2022 16:53:45 -0600
+Subject: [PATCH 15/15] Low: fencer: reset op result before grabbing new one
+
+just in case
+---
+ daemons/fenced/fenced_remote.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
+index 7fb7695fba..dc4649e0fc 100644
+--- a/daemons/fenced/fenced_remote.c
++++ b/daemons/fenced/fenced_remote.c
+@@ -2219,6 +2219,7 @@ fenced_process_fencing_reply(xmlNode *msg)
+         return;
+     }
+ 
++    pcmk__reset_result(&op->result);
+     op->result = result; // The operation takes ownership of the result
+ 
+     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
+-- 
+2.27.0
+
diff --git a/SOURCES/022-memory-leak.patch b/SOURCES/022-memory-leak.patch
new file mode 100644
index 0000000..3970dd3
--- /dev/null
+++ b/SOURCES/022-memory-leak.patch
@@ -0,0 +1,82 @@
+From 8034a203bbff0aa3b53f2946dc58e409bd7246c9 Mon Sep 17 00:00:00 2001
+From: Ken Gaillot <kgaillot@redhat.com>
+Date: Thu, 20 Jan 2022 15:03:31 -0600
+Subject: [PATCH] Fix: scheduler: avoid memory leak when displaying clones
+
+Previously, pe__clone_default() unconditionally created a hash table for
+stopped instances, but didn't free it in every code path.
+
+Now, only create the table when we have something to put in it and might
+actually use it, and ensure it always gets freed.
+---
+ lib/pengine/clone.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
+index 742e2920b0..920a04c32c 100644
+--- a/lib/pengine/clone.c
++++ b/lib/pengine/clone.c
+@@ -761,7 +761,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+     GList *only_node = va_arg(args, GList *);
+     GList *only_rsc = va_arg(args, GList *);
+ 
+-    GHashTable *stopped = pcmk__strkey_table(free, free);
++    GHashTable *stopped = NULL;
+ 
+     char *list_text = NULL;
+     size_t list_text_len = 0;
+@@ -818,7 +818,11 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+         } else if (partially_active == FALSE) {
+             // List stopped instances when requested (except orphans)
+             if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
++                && !pcmk_is_set(show_opts, pcmk_show_clone_detail)
+                 && pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
++                if (stopped == NULL) {
++                    stopped = pcmk__strkey_table(free, free);
++                }
+                 g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
+             }
+ 
+@@ -873,7 +877,6 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+     }
+ 
+     if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
+-        g_hash_table_destroy(stopped);
+         PCMK__OUTPUT_LIST_FOOTER(out, rc);
+         return pcmk_rc_ok;
+     }
+@@ -948,8 +951,10 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+             GList *list = g_hash_table_get_values(rsc->allowed_nodes);
+ 
+             /* Custom stopped table for non-unique clones */
+-            g_hash_table_destroy(stopped);
+-            stopped = pcmk__strkey_table(free, free);
++            if (stopped != NULL) {
++                g_hash_table_destroy(stopped);
++                stopped = NULL;
++            }
+ 
+             if (list == NULL) {
+                 /* Clusters with symmetrical=false haven't calculated allowed_nodes yet
+@@ -972,6 +977,9 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+                         state = "Stopped (disabled)";
+                     }
+ 
++                    if (stopped == NULL) {
++                        stopped = pcmk__strkey_table(free, free);
++                    }
+                     if (probe_op != NULL) {
+                         int rc;
+ 
+@@ -987,7 +995,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
+             g_list_free(list);
+         }
+ 
+-        if (g_hash_table_size(stopped) > 0) {
++        if (stopped != NULL) {
+             GList *list = sorted_hash_table_values(stopped);
+ 
+             clone_header(out, &rc, rsc, clone_data);
+-- 
+2.27.0
+
diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec
index 459a81a..18dbbe9 100644
--- a/SPECS/pacemaker.spec
+++ b/SPECS/pacemaker.spec
@@ -36,7 +36,7 @@
 ## can be incremented to build packages reliably considered "newer"
 ## than previously built packages with the same pcmkversion)
 %global pcmkversion 2.1.2
-%global specversion 2
+%global specversion 3
 
 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
 %global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175
@@ -271,6 +271,20 @@ Patch5:        005-fencing-reasons.patch
 Patch6:        006-stateful-metadata.patch
 Patch7:        007-memory-leak.patch
 Patch8:        008-fencing-history.patch
+Patch9:        009-fencing-reasons.patch
+Patch10:       010-probe-failures.patch
+Patch11:       011-fencing-reasons.patch
+Patch12:       012-notify-crash.patch
+Patch13:       013-probe-failures.patch
+Patch14:       014-pcmk_delay_base.patch
+Patch15:       015-fencing-reasons.patch
+Patch16:       016-fencing-crash.patch
+Patch17:       017-fencing-reasons.patch
+Patch18:       018-failure-messages.patch
+Patch19:       019-corosync-tracking.patch
+Patch20:       020-systemd-unit.patch
+Patch21:       021-failure-messages.patch
+Patch22:       022-memory-leak.patch
 
 # downstream-only commits
 #Patch1xx:      1xx-xxxx.patch
@@ -968,6 +982,18 @@ exit 0
 %license %{nagios_name}-%{nagios_hash}/COPYING
 
 %changelog
+* Fri Jan 21 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-3
+- Improve display of failed actions
+- Handle certain probe failures as stopped instead of failed
+- Update pcmk_delay_base description in option meta-data
+- Avoid crash when using clone notifications
+- Retry Corosync shutdown tracking if first attempt fails
+- Resolves: rhbz1470834
+- Resolves: rhbz1506372
+- Resolves: rhbz2027370
+- Resolves: rhbz2039675
+- Resolves: rhbz2042550
+
 * Thu Dec 16 2021 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-2
 - Correctly get metadata for systemd agent names that end in '@'
 - Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data