Blob Blame History Raw
From 9ee3d6c9b0aba6aae022cc152a3b3472fe388fa3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 16:44:32 -0600
Subject: [PATCH 01/15] Refactor: fencer: add exit reason to fencing operation
 object

In order to pass a fencing action's exit reason with the action history,
we need the exit reason in remote_fencing_op_t. Nothing sets or uses it as of
this commit.
---
 daemons/fenced/fenced_remote.c    | 2 ++
 daemons/fenced/pacemaker-fenced.h | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 6eebb7381e..0fa9706140 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -260,6 +260,8 @@ free_remote_op(gpointer data)
     }
     g_list_free_full(op->automatic_list, free);
     g_list_free(op->duplicates);
+
+    pcmk__reset_result(&op->result);
     free(op);
 }
 
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 502fcc9a29..1a5c933ea7 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
  *
  * This source code is licensed under the GNU General Public License version 2
  * or later (GPLv2+) WITHOUT ANY WARRANTY.
@@ -151,6 +151,8 @@ typedef struct remote_fencing_op_s {
     /*! The point at which the remote operation completed(nsec) */
     long long completed_nsec;
 
+    /*! The (potentially intermediate) result of the operation */
+    pcmk__action_result_t result;
 } remote_fencing_op_t;
 
 void fenced_broadcast_op_result(remote_fencing_op_t *op,
-- 
2.27.0


From 97a2c318866adc5ef5e426c5c3b753df1fa3ab66 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:08:42 -0600
Subject: [PATCH 02/15] Refactor: fencer: track full result in
 remote_fencing_op_t

Now that remote_fencing_op_t has a place for the full result,
set it before calling finalize_op(), instead of passing a separate result
object to finalize_op().

As a bonus, this simplifies the memory management, reducing the chance of
mistakes.
---
 daemons/fenced/fenced_remote.c | 161 ++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 84 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 0fa9706140..30edbff890 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -82,8 +82,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
 static void request_peer_fencing(remote_fencing_op_t *op,
                                 peer_device_info_t *peer,
                                 pcmk__action_result_t *result);
-static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
-                        pcmk__action_result_t *result, bool dup);
+static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
                                 const peer_device_info_t *chosen_peer);
@@ -485,7 +484,9 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
                       other->client_name, other->originator,
                       pcmk_exec_status_str(result->execution_status),
                       other->id);
-            finalize_op(other, data, result, true);
+            pcmk__set_result(&other->result, result->exit_status,
+                             result->execution_status, result->exit_reason);
+            finalize_op(other, data, true);
 
         } else {
             // Possible if (for example) it timed out already
@@ -520,20 +521,20 @@ delegate_from_xml(xmlNode *xml)
  *
  * \param[in] op      Fencer operation that completed
  * \param[in] data    If not NULL, XML reply of last delegated fencing operation
- * \param[in] result  Full operation result
  * \param[in] dup     Whether this operation is a duplicate of another
  *                    (in which case, do not broadcast the result)
+ *
+ *  \note The operation result should be set before calling this function.
  */
 static void
-finalize_op(remote_fencing_op_t *op, xmlNode *data,
-            pcmk__action_result_t *result, bool dup)
+finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
 {
     int level = LOG_ERR;
     const char *subt = NULL;
     xmlNode *local_data = NULL;
     gboolean op_merged = FALSE;
 
-    CRM_CHECK((op != NULL) && (result != NULL), return);
+    CRM_CHECK((op != NULL), return);
 
     if (op->notify_sent) {
         // Most likely, this is a timed-out action that eventually completed
@@ -557,11 +558,11 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
         local_data = data;
 
     } else if (op->delegate == NULL) {
-        switch (result->execution_status) {
+        switch (op->result.execution_status) {
             case PCMK_EXEC_NO_FENCE_DEVICE:
                 break;
             case PCMK_EXEC_INVALID:
-                if (result->exit_status == CRM_EX_EXPIRED) {
+                if (op->result.exit_status == CRM_EX_EXPIRED) {
                     break;
                 }
                 // else fall through
@@ -581,12 +582,12 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
     subt = crm_element_value(data, F_SUBTYPE);
     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
         /* Defer notification until the bcast message arrives */
-        fenced_broadcast_op_result(op, result, op_merged);
+        fenced_broadcast_op_result(op, &op->result, op_merged);
         free_xml(local_data);
         return;
     }
 
-    if (pcmk__result_ok(result) || dup
+    if (pcmk__result_ok(&op->result) || dup
         || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         level = LOG_NOTICE;
     }
@@ -595,16 +596,17 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
                (op->target? op->target : ""),
                (op->delegate? op->delegate : "unknown node"),
                op->client_name, op->originator,
-               (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
-               pcmk_exec_status_str(result->execution_status),
-               ((result->exit_reason == NULL)? "" : ": "),
-               ((result->exit_reason == NULL)? "" : result->exit_reason),
+               (op_merged? " (merged)" : ""),
+               crm_exit_str(op->result.exit_status),
+               pcmk_exec_status_str(op->result.execution_status),
+               ((op->result.exit_reason == NULL)? "" : ": "),
+               ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
                op->id);
 
-    handle_local_reply_and_notify(op, data, result);
+    handle_local_reply_and_notify(op, data, &op->result);
 
     if (!dup) {
-        finalize_op_duplicates(op, data, result);
+        finalize_op_duplicates(op, data, &op->result);
     }
 
     /* Free non-essential parts of the record
@@ -634,7 +636,6 @@ static gboolean
 remote_op_watchdog_done(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     op->op_timer_one = 0;
 
@@ -642,8 +643,8 @@ remote_op_watchdog_done(gpointer userdata)
                CRM_XS " id=%.8s",
                op->action, op->target, op->client_name, op->id);
     op->state = st_done;
-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
-    finalize_op(op, NULL, &result, false);
+    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    finalize_op(op, NULL, false);
     return G_SOURCE_REMOVE;
 }
 
@@ -676,8 +677,6 @@ remote_op_timeout_one(gpointer userdata)
 static void
 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
 {
-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
     op->op_timer_total = 0;
 
     crm_debug("Action '%s' targeting %s for client %s timed out "
@@ -690,13 +689,12 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
          * devices, and return success.
          */
         op->state = st_done;
-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
     } else {
         op->state = st_failed;
-        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+        pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
     }
-    finalize_op(op, NULL, &result, false);
-    pcmk__reset_result(&result);
+    finalize_op(op, NULL, false);
 }
 
 /*!
@@ -1094,13 +1092,9 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
     set_fencing_completed(op);
     op->delegate = strdup("a human");
 
-    {
-        // For the fencer's purposes, the fencing operation is done
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
-        finalize_op(op, msg, &result, false);
-    }
+    // For the fencer's purposes, the fencing operation is done
+    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    finalize_op(op, msg, false);
 
     /* For the requester's purposes, the operation is still pending. The
      * actual result will be sent asynchronously via the operation's done_cb().
@@ -1279,16 +1273,11 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
     switch (op->state) {
         case st_failed:
             // advance_topology_level() exhausted levels
-            {
-                pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-                pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
-                                 "All topology levels failed");
-                crm_warn("Could not request peer fencing (%s) targeting %s "
-                         CRM_XS " id=%.8s", op->action, op->target, op->id);
-                finalize_op(op, NULL, &result, false);
-                pcmk__reset_result(&result);
-            }
+            pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+                             "All topology levels failed");
+            crm_warn("Could not request peer fencing (%s) targeting %s "
+                     CRM_XS " id=%.8s", op->action, op->target, op->id);
+            finalize_op(op, NULL, false);
             return op;
 
         case st_duplicate:
@@ -1613,10 +1602,6 @@ static void
 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
                                  xmlNode *msg)
 {
-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
-
     /* Advance to the next device at this topology level, if any */
     if (op->devices) {
         op->devices = op->devices->next;
@@ -1644,6 +1629,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
     }
 
     if (op->devices) {
+        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
         /* Necessary devices remain, so execute the next one */
         crm_trace("Next targeting %s on behalf of %s@%s",
                   op->target, op->client_name, op->originator);
@@ -1659,7 +1648,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
         crm_trace("Marking complex fencing op targeting %s as complete",
                   op->target);
         op->state = st_done;
-        finalize_op(op, msg, &result, false);
+        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+        finalize_op(op, msg, false);
     }
 }
 
@@ -1868,7 +1858,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
         }
 
         op->state = st_failed;
-        finalize_op(op, NULL, result, false);
+        pcmk__set_result(&op->result, result->exit_status,
+                         result->execution_status, result->exit_reason);
+        finalize_op(op, NULL, false);
 
     } else {
         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
@@ -2245,31 +2237,34 @@ fenced_process_fencing_reply(xmlNode *msg)
         /* Could be for an event that began before we started */
         /* TODO: Record the op for later querying */
         crm_info("Received peer result of unknown or expired operation %s", id);
-        goto done;
+        pcmk__reset_result(&result);
+        return;
     }
 
+    op->result = result; // The operation takes ownership of the result
+
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
         crm_err("Received outdated reply for device %s (instead of %s) to "
                 "fence (%s) %s. Operation already timed out at peer level.",
                 device, (const char *) op->devices->data, op->action, op->target);
-        goto done;
+        return;
     }
 
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
         crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
                   CRM_XS " id=%.8s",
                   op->action, op->target, op->client_name, op->originator,
-                  pcmk_exec_status_str(result.execution_status),
-                  (result.exit_reason == NULL)? "" : " (",
-                  (result.exit_reason == NULL)? "" : result.exit_reason,
-                  (result.exit_reason == NULL)? "" : ")", op->id);
-        if (pcmk__result_ok(&result)) {
+                  pcmk_exec_status_str(op->result.execution_status),
+                  (op->result.exit_reason == NULL)? "" : " (",
+                  (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
+                  (op->result.exit_reason == NULL)? "" : ")", op->id);
+        if (pcmk__result_ok(&op->result)) {
             op->state = st_done;
         } else {
             op->state = st_failed;
         }
-        finalize_op(op, msg, &result, false);
-        goto done;
+        finalize_op(op, msg, false);
+        return;
 
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         /* If this isn't a remote level broadcast, and we are not the
@@ -2277,7 +2272,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         crm_err("Received non-broadcast fencing result for operation %.8s "
                 "we do not own (device %s targeting %s)",
                 op->id, device, op->target);
-        goto done;
+        return;
     }
 
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
@@ -2286,58 +2281,58 @@ fenced_process_fencing_reply(xmlNode *msg)
         crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
                    op->action, op->target, device, op->client_name,
                    op->originator,
-                   pcmk_exec_status_str(result.execution_status),
-                  (result.exit_reason == NULL)? "" : " (",
-                  (result.exit_reason == NULL)? "" : result.exit_reason,
-                  (result.exit_reason == NULL)? "" : ")");
+                   pcmk_exec_status_str(op->result.execution_status),
+                  (op->result.exit_reason == NULL)? "" : " (",
+                  (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
+                  (op->result.exit_reason == NULL)? "" : ")");
 
         /* We own the op, and it is complete. broadcast the result to all nodes
          * and notify our local clients. */
         if (op->state == st_done) {
-            finalize_op(op, msg, &result, false);
-            goto done;
+            finalize_op(op, msg, false);
+            return;
         }
 
-        if ((op->phase == 2) && !pcmk__result_ok(&result)) {
+        if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
             /* A remapped "on" failed, but the node was already turned off
              * successfully, so ignore the error and continue.
              */
             crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
                      "after successful 'off'",
-                     device, pcmk_exec_status_str(result.execution_status),
-                     (result.exit_reason == NULL)? "" : ": ",
-                     (result.exit_reason == NULL)? "" : result.exit_reason,
+                     device, pcmk_exec_status_str(op->result.execution_status),
+                     (op->result.exit_reason == NULL)? "" : ": ",
+                     (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
                      op->target);
-            pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+            pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         }
 
-        if (pcmk__result_ok(&result)) {
+        if (pcmk__result_ok(&op->result)) {
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
             advance_topology_device_in_level(op, device, msg);
-            goto done;
+            return;
         } else {
             /* This device failed, time to try another topology level. If no other
              * levels are available, mark this operation as failed and report results. */
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
                 op->state = st_failed;
-                finalize_op(op, msg, &result, false);
-                goto done;
+                finalize_op(op, msg, false);
+                return;
             }
         }
 
-    } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
+    } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
         crm_trace("All done for %s", op->target);
         op->state = st_done;
-        finalize_op(op, msg, &result, false);
-        goto done;
+        finalize_op(op, msg, false);
+        return;
 
-    } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
+    } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
                && (op->devices == NULL)) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
-        finalize_op(op, msg, &result, false);
-        goto done;
+        finalize_op(op, msg, false);
+        return;
 
     } else {
         /* fall-through and attempt other fencing action using another peer */
@@ -2346,10 +2341,8 @@ fenced_process_fencing_reply(xmlNode *msg)
     /* Retry on failure */
     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
               op->target, op->originator, op->client_name,
-              pcmk_exec_status_str(result.execution_status));
-    request_peer_fencing(op, NULL, &result);
-done:
-    pcmk__reset_result(&result);
+              pcmk_exec_status_str(op->result.execution_status));
+    request_peer_fencing(op, NULL, &op->result);
 }
 
 gboolean
-- 
2.27.0


From c59d062154f7c9e15e90929a20ea244d7efd7247 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:11:12 -0600
Subject: [PATCH 03/15] Refactor: fencer: drop redundant argument from
 finalize_op_duplicates()

... now that the result is in the op
---
 daemons/fenced/fenced_remote.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 30edbff890..8b496e1042 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -468,11 +468,9 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
  *
  * \param[in] op         Fencer operation that completed
  * \param[in] data       Top-level XML to add notification to
- * \param[in] result     Full operation result
  */
 static void
-finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
-                       pcmk__action_result_t *result)
+finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
 {
     for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
         remote_fencing_op_t *other = iter->data;
@@ -482,10 +480,11 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
             crm_debug("Performing duplicate notification for %s@%s: %s "
                       CRM_XS " id=%.8s",
                       other->client_name, other->originator,
-                      pcmk_exec_status_str(result->execution_status),
+                      pcmk_exec_status_str(op->result.execution_status),
                       other->id);
-            pcmk__set_result(&other->result, result->exit_status,
-                             result->execution_status, result->exit_reason);
+            pcmk__set_result(&other->result, op->result.exit_status,
+                             op->result.execution_status,
+                             op->result.exit_reason);
             finalize_op(other, data, true);
 
         } else {
@@ -606,7 +605,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
     handle_local_reply_and_notify(op, data, &op->result);
 
     if (!dup) {
-        finalize_op_duplicates(op, data, &op->result);
+        finalize_op_duplicates(op, data);
     }
 
     /* Free non-essential parts of the record
-- 
2.27.0


From 6c49675855323a52a534afa112a0861ba2e3b1ad Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:15:17 -0600
Subject: [PATCH 04/15] Refactor: fencer: drop redundant argument from
 fenced_broadcast_op_result()

... now that the op includes the result
---
 daemons/fenced/fenced_history.c   | 9 +++------
 daemons/fenced/fenced_remote.c    | 8 +++-----
 daemons/fenced/pacemaker-fenced.h | 3 +--
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 0157deadb3..5cacf36ca8 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -359,8 +359,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
     }
 
     if (remote_history) {
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
         init_stonith_remote_op_hash_table(&stonith_remote_op_list);
 
         updated |= g_hash_table_size(remote_history);
@@ -378,10 +376,10 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
                 /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
                  * from setting a delegate
                  */
-                pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
+                pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
                                  "Initiated by earlier fencer "
                                  "process and presumed failed");
-                fenced_broadcast_op_result(op, &result, false);
+                fenced_broadcast_op_result(op, false);
             }
 
             g_hash_table_iter_steal(&iter);
@@ -396,7 +394,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
              */
         }
 
-        pcmk__reset_result(&result);
         g_hash_table_destroy(remote_history); /* remove what is left */
     }
 
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 8b496e1042..fb5a5e980e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -390,16 +390,14 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
  * \brief Broadcast a fence result notification to all CPG peers
  *
  * \param[in] op         Fencer operation that completed
- * \param[in] result     Full operation result
  * \param[in] op_merged  Whether this operation is a duplicate of another
  */
 void
-fenced_broadcast_op_result(remote_fencing_op_t *op,
-                           pcmk__action_result_t *result, bool op_merged)
+fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
-    xmlNode *notify_data = fencing_result2xml(op, result);
+    xmlNode *notify_data = fencing_result2xml(op, &op->result);
 
     count++;
     crm_trace("Broadcasting result to peers");
@@ -581,7 +579,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
     subt = crm_element_value(data, F_SUBTYPE);
     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
         /* Defer notification until the bcast message arrives */
-        fenced_broadcast_op_result(op, &op->result, op_merged);
+        fenced_broadcast_op_result(op, op_merged);
         free_xml(local_data);
         return;
     }
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 1a5c933ea7..6213407da3 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -155,8 +155,7 @@ typedef struct remote_fencing_op_s {
     pcmk__action_result_t result;
 } remote_fencing_op_t;
 
-void fenced_broadcast_op_result(remote_fencing_op_t *op,
-                                pcmk__action_result_t *result, bool op_merged);
+void fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged);
 
 // Fencer-specific client flags
 enum st_client_flags {
-- 
2.27.0


From 73994fc740b8833457b130368db479502d49f285 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:17:33 -0600
Subject: [PATCH 05/15] Refactor: fencer: drop redundant argument from
 handle_local_reply_and_notify()

... now that the op includes the result
---
 daemons/fenced/fenced_remote.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index fb5a5e980e..2621cb2f19 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -424,11 +424,9 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
  *
  * \param[in] op         Fencer operation that completed
  * \param[in] data       Top-level XML to add notification to
- * \param[in] result     Full operation result
  */
 static void
-handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
-                              pcmk__action_result_t *result)
+handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
 {
     xmlNode *notify_data = NULL;
     xmlNode *reply = NULL;
@@ -443,15 +441,15 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
     crm_xml_add(data, F_STONITH_TARGET, op->target);
     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 
-    reply = fenced_construct_reply(op->request, data, result);
+    reply = fenced_construct_reply(op->request, data, &op->result);
     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 
     /* Send fencing OP reply to local client that initiated fencing */
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
-    notify_data = fencing_result2xml(op, result);
-    fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+    notify_data = fencing_result2xml(op, &op->result);
+    fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
     free_xml(notify_data);
     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
 
@@ -600,7 +598,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
                ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
                op->id);
 
-    handle_local_reply_and_notify(op, data, &op->result);
+    handle_local_reply_and_notify(op, data);
 
     if (!dup) {
         finalize_op_duplicates(op, data);
-- 
2.27.0


From 194056d18d3b550d3a53b94d558ceed03b5e5442 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:18:27 -0600
Subject: [PATCH 06/15] Refactor: fencer: drop redundant argument from
 fencing_result2xml()

... now that the op includes the result
---
 daemons/fenced/fenced_remote.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 2621cb2f19..8d4f53eef6 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -362,13 +362,12 @@ undo_op_remap(remote_fencing_op_t *op)
  * \brief Create notification data XML for a fencing operation result
  *
  * \param[in] op      Fencer operation that completed
- * \param[in] result  Full operation result
  *
  * \return Newly created XML to add as notification data
  * \note The caller is responsible for freeing the result.
  */
 static xmlNode *
-fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
+fencing_result2xml(remote_fencing_op_t *op)
 {
     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
 
@@ -381,7 +380,7 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
 
-    stonith__xe_set_result(notify_data, result);
+    stonith__xe_set_result(notify_data, &op->result);
     return notify_data;
 }
 
@@ -397,7 +396,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
 {
     static int count = 0;
     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
-    xmlNode *notify_data = fencing_result2xml(op, &op->result);
+    xmlNode *notify_data = fencing_result2xml(op);
 
     count++;
     crm_trace("Broadcasting result to peers");
@@ -448,7 +447,7 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
-    notify_data = fencing_result2xml(op, &op->result);
+    notify_data = fencing_result2xml(op);
     fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
     free_xml(notify_data);
     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
-- 
2.27.0


From c5d38cb201a1219ca95127cba9c3a778e31966a2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:35:43 -0600
Subject: [PATCH 07/15] Refactor: fencer: drop redundant argument from
 request_peer_fencing()

... now that the op includes the result
---
 daemons/fenced/fenced_remote.c | 66 +++++++++++++---------------------
 1 file changed, 25 insertions(+), 41 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 8d4f53eef6..7fb7695fba 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -80,8 +80,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
                                   int call_options);
 
 static void request_peer_fencing(remote_fencing_op_t *op,
-                                peer_device_info_t *peer,
-                                pcmk__action_result_t *result);
+                                 peer_device_info_t *peer);
 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
@@ -646,18 +645,16 @@ static gboolean
 remote_op_timeout_one(gpointer userdata)
 {
     remote_fencing_op_t *op = userdata;
-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     op->op_timer_one = 0;
 
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
                " id=%.8s", op->action, op->target, op->client_name, op->id);
-    pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+    pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
                      "Peer did not return fence result within timeout");
 
-
     // Try another device, if appropriate
-    request_peer_fencing(op, NULL, &result);
+    request_peer_fencing(op, NULL);
     return FALSE;
 }
 
@@ -730,13 +727,10 @@ remote_op_query_timeout(gpointer data)
         crm_debug("Operation %.8s targeting %s already in progress",
                   op->id, op->target);
     } else if (op->query_results) {
-        // Result won't be used in this case, but we need to pass something
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
         // Query succeeded, so attempt the actual fencing
         crm_debug("Query %.8s targeting %s complete (state=%s)",
                   op->id, op->target, stonith_op_state_str(op->state));
-        request_peer_fencing(op, NULL, &result);
+        request_peer_fencing(op, NULL);
     } else {
         crm_debug("Query %.8s targeting %s timed out (state=%s)",
                   op->id, op->target, stonith_op_state_str(op->state));
@@ -1622,11 +1616,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
         op_phase_on(op);
     }
 
-    if (op->devices) {
-        pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
-        pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    // This function is only called if the previous device succeeded
+    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 
+    if (op->devices) {
         /* Necessary devices remain, so execute the next one */
         crm_trace("Next targeting %s on behalf of %s@%s",
                   op->target, op->client_name, op->originator);
@@ -1636,13 +1629,12 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
             op->delay = 0;
         }
 
-        request_peer_fencing(op, NULL, &result);
+        request_peer_fencing(op, NULL);
     } else {
         /* We're done with all devices and phases, so finalize operation */
         crm_trace("Marking complex fencing op targeting %s as complete",
                   op->target);
         op->state = st_done;
-        pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
         finalize_op(op, msg, false);
     }
 }
@@ -1673,13 +1665,9 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
  * \param[in] op      Fencing operation to be executed
  * \param[in] peer    If NULL or topology is in use, choose best peer to execute
  *                    the fencing, otherwise use this peer
- * \param[in] result  Full result of previous failed attempt, if any (used as
- *                    final result only if a previous attempt failed, topology
- *                    is not in use, and no devices remain to be attempted)
  */
 static void
-request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
-                    pcmk__action_result_t *result)
+request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
 {
     const char *device = NULL;
     int timeout;
@@ -1822,27 +1810,26 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
             }
         }
 
-        // This is the only case in which result will be used
-        CRM_CHECK(result != NULL, return);
-
         if (op->state == st_query) {
             crm_info("No peers (out of %d) have devices capable of fencing "
                      "(%s) %s for client %s " CRM_XS " state=%s",
                      op->replies, op->action, op->target, op->client_name,
                      stonith_op_state_str(op->state));
 
-            pcmk__reset_result(result);
-            pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
-                             NULL);
+            pcmk__reset_result(&op->result);
+            pcmk__set_result(&op->result, CRM_EX_ERROR,
+                             PCMK_EXEC_NO_FENCE_DEVICE, NULL);
         } else {
             if (pcmk_is_set(op->call_options, st_opt_topology)) {
-                pcmk__reset_result(result);
-                pcmk__set_result(result, CRM_EX_ERROR,
+                pcmk__reset_result(&op->result);
+                pcmk__set_result(&op->result, CRM_EX_ERROR,
                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
             }
-            /* ... else use result provided by caller -- overwriting it with
-               PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
-               setting the correct delegate if needed.
+            /* ... else use existing result from previous failed attempt
+             * (topology is not in use, and no devices remain to be attempted).
+             * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
+             * prevent finalize_op() from setting the correct delegate if
+             * needed.
              */
 
             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
@@ -1852,8 +1839,6 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
         }
 
         op->state = st_failed;
-        pcmk__set_result(&op->result, result->exit_status,
-                         result->execution_status, result->exit_reason);
         finalize_op(op, NULL, false);
 
     } else {
@@ -2104,7 +2089,6 @@ process_remote_stonith_query(xmlNode * msg)
     peer_device_info_t *peer = NULL;
     uint32_t replies_expected;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
-    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
     CRM_CHECK(dev != NULL, return -EPROTO);
 
@@ -2139,7 +2123,7 @@ process_remote_stonith_query(xmlNode * msg)
         peer = add_result(op, host, ndevices, dev);
     }
 
-    pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+    pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
         /* If we start the fencing before all the topology results are in,
@@ -2148,12 +2132,12 @@ process_remote_stonith_query(xmlNode * msg)
         if (op->state == st_query && all_topology_devices_found(op)) {
             /* All the query results are in for the topology, start the fencing ops. */
             crm_trace("All topology devices found");
-            request_peer_fencing(op, peer, &result);
+            request_peer_fencing(op, peer);
 
         } else if (have_all_replies) {
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
-            request_peer_fencing(op, NULL, &result);
+            request_peer_fencing(op, NULL);
         }
 
     } else if (op->state == st_query) {
@@ -2165,12 +2149,12 @@ process_remote_stonith_query(xmlNode * msg)
             /* we have a verified device living on a peer that is not the target */
             crm_trace("Found %d verified device%s",
                       nverified, pcmk__plural_s(nverified));
-            request_peer_fencing(op, peer, &result);
+            request_peer_fencing(op, peer);
 
         } else if (have_all_replies) {
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
                      replies_expected, op->replies);
-            request_peer_fencing(op, NULL, &result);
+            request_peer_fencing(op, NULL);
 
         } else {
             crm_trace("Waiting for more peer results before launching fencing operation");
@@ -2336,7 +2320,7 @@ fenced_process_fencing_reply(xmlNode *msg)
     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
               op->target, op->originator, op->client_name,
               pcmk_exec_status_str(op->result.execution_status));
-    request_peer_fencing(op, NULL, &op->result);
+    request_peer_fencing(op, NULL);
 }
 
 gboolean
-- 
2.27.0


From be0a0b652c13161a82b05d3104449b7bfc06e8ac Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:56:24 -0600
Subject: [PATCH 08/15] Feature: fencer: track full result in fencing history

Add fencing operation results when creating XML in
stonith_local_history_diff_and_merge(), and parse the results from the received
XML in stonith_xml_history_to_list().

With this, the fencer now always has full results in its op list, and returns
them in the reply for STONITH_OP_FENCE_HISTORY requests (though nothing uses
that as of this commit).
---
 daemons/fenced/fenced_history.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 5cacf36ca8..3ebf016e67 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -257,6 +257,7 @@ stonith_xml_history_to_list(xmlNode *history)
         op->completed_nsec = completed_nsec;
         crm_element_value_int(xml_op, F_STONITH_STATE, &state);
         op->state = (enum op_state) state;
+        stonith__xe_get_result(xml_op, &op->result);
 
         g_hash_table_replace(rv, id, op);
         CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
@@ -355,6 +356,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
                 crm_xml_add_ll(entry, F_STONITH_DATE, op->completed);
                 crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec);
                 crm_xml_add_int(entry, F_STONITH_STATE, op->state);
+                stonith__xe_set_result(entry, &op->result);
             }
     }
 
-- 
2.27.0


From afc5292036e212bcfc7475893e0b326b2a69ac58 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 22 Dec 2021 17:17:21 -0600
Subject: [PATCH 09/15] API: libstonithd: add exit_reason member to
 stonith_history_t

not yet used, but will be
---
 include/crm/stonith-ng.h | 3 ++-
 lib/fencing/st_client.c  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index 3fe9cf54f8..2c79bfa579 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -111,6 +111,7 @@ typedef struct stonith_history_s {
     time_t completed;
     struct stonith_history_s *next;
     long completed_nsec;
+    char *exit_reason;
 } stonith_history_t;
 
 typedef struct stonith_s stonith_t;
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 718739b321..57a2e03361 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -735,6 +735,7 @@ void stonith_history_free(stonith_history_t *history)
         free(hp->origin);
         free(hp->delegate);
         free(hp->client);
+        free(hp->exit_reason);
     }
 }
 
-- 
2.27.0


From 1b9e2896322849002a5c0a3a34c9375ea32571d6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 18:04:15 -0600
Subject: [PATCH 10/15] Feature: fencing: return exit reason with fencing
 history

libstonithd's stonith_t:cmds->history() method now parses exit reasons from the
fencer reply, and returns them in the stonith_history_t results.
---
 lib/fencing/st_client.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 57a2e03361..d229b34805 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -698,6 +698,7 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
             stonith_history_t *kvp;
             long long completed;
             long long completed_nsec = 0L;
+            pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
 
             kvp = calloc(1, sizeof(stonith_history_t));
             kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
@@ -711,6 +712,11 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
             kvp->completed_nsec = completed_nsec;
             crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
 
+            stonith__xe_get_result(op, &result);
+            kvp->exit_reason = result.exit_reason;
+            result.exit_reason = NULL;
+            pcmk__reset_result(&result);
+
             if (last) {
                 last->next = kvp;
             } else {
-- 
2.27.0


From ba4e77242e9be4ebeb2843b444ee4afad43c29f3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:44:39 -0600
Subject: [PATCH 11/15] Feature: fencing: display exit reasons with failed
 fencing events

... when available
---
 lib/fencing/st_output.c | 20 ++++++++++++++++----
 tools/crm_mon_curses.c  |  9 +++++++--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c
index e484278867..18924d795d 100644
--- a/lib/fencing/st_output.c
+++ b/lib/fencing/st_output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2019-2021 the Pacemaker project contributors
+ * Copyright 2019-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -11,6 +11,7 @@
 #include <stdarg.h>
 
 #include <crm/stonith-ng.h>
+#include <crm/msg_xml.h>
 #include <crm/common/iso8601.h>
 #include <crm/common/util.h>
 #include <crm/common/xml.h>
@@ -263,8 +264,12 @@ stonith_event_html(pcmk__output_t *out, va_list args) {
             char *failed_s = time_t_string(event->completed);
 
             out->list_item(out, "failed-stonith-event",
-                           "%s of %s failed : delegate=%s, client=%s, origin=%s, %s='%s' %s",
+                           "%s of %s failed%s%s%s: "
+                           "delegate=%s, client=%s, origin=%s, %s='%s' %s",
                            stonith_action_str(event->action), event->target,
+                           (event->exit_reason == NULL)? "" : " (",
+                           (event->exit_reason == NULL)? "" : event->exit_reason,
+                           (event->exit_reason == NULL)? "" : ")",
                            event->delegate ? event->delegate : "",
                            event->client, event->origin,
                            full_history ? "completed" : "last-failed",
@@ -296,8 +301,13 @@ stonith_event_text(pcmk__output_t *out, va_list args) {
 
     switch (event->state) {
         case st_failed:
-            pcmk__indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
+            pcmk__indented_printf(out,
+                                  "%s of %s failed%s%s%s: "
+                                  "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
                                   stonith_action_str(event->action), event->target,
+                                  (event->exit_reason == NULL)? "" : " (",
+                                  (event->exit_reason == NULL)? "" : event->exit_reason,
+                                  (event->exit_reason == NULL)? "" : ")",
                                   event->delegate ? event->delegate : "",
                                   event->client, event->origin,
                                   full_history ? "completed" : "last-failed", buf,
@@ -341,7 +351,9 @@ stonith_event_xml(pcmk__output_t *out, va_list args) {
 
     switch (event->state) {
         case st_failed:
-            crm_xml_add(node, "status", "failed");
+            pcmk__xe_set_props(node, "status", "failed",
+                               XML_LRM_ATTR_EXIT_REASON, event->exit_reason,
+                               NULL);
             break;
 
         case st_done:
diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c
index bae3710c44..73c8516a8c 100644
--- a/tools/crm_mon_curses.c
+++ b/tools/crm_mon_curses.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2019-2021 the Pacemaker project contributors
+ * Copyright 2019-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -463,8 +463,13 @@ stonith_event_console(pcmk__output_t *out, va_list args) {
 
     switch (event->state) {
         case st_failed:
-            curses_indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s'%s\n",
+            curses_indented_printf(out,
+                                   "%s of %s failed%s%s%s: "
+                                   "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
                                    stonith_action_str(event->action), event->target,
+                                   (event->exit_reason == NULL)? "" : " (",
+                                   (event->exit_reason == NULL)? "" : event->exit_reason,
+                                   (event->exit_reason == NULL)? "" : ")",
                                    event->delegate ? event->delegate : "",
                                    event->client, event->origin,
                                    full_history ? "completed" : "last-failed", buf,
-- 
2.27.0


From 8105fb4a3a786780fdf85b3d0308eaf6df1ea434 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:45:22 -0600
Subject: [PATCH 12/15] Low: schemas: copy fence-event API schema in
 preparation for changes

---
 include/crm/common/output_internal.h |  2 +-
 xml/api/fence-event-2.15.rng         | 33 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)
 create mode 100644 xml/api/fence-event-2.15.rng

diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h
index 479f0e4b43..8c5dcee17c 100644
--- a/include/crm/common/output_internal.h
+++ b/include/crm/common/output_internal.h
@@ -27,7 +27,7 @@ extern "C" {
 #  include <glib.h>
 #  include <crm/common/results.h>
 
-#  define PCMK__API_VERSION "2.14"
+#  define PCMK__API_VERSION "2.15"
 
 #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS)
 #  define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS)))
diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
new file mode 100644
index 0000000000..e54687cd25
--- /dev/null
+++ b/xml/api/fence-event-2.15.rng
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<grammar xmlns="http://relaxng.org/ns/structure/1.0"
+         datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+
+    <start>
+        <ref name="fencing-history-event"/>
+    </start>
+
+    <define name="fencing-history-event">
+        <element name="fence_event">
+            <attribute name="status">
+                <choice>
+                    <value>failed</value>
+                    <value>success</value>
+                    <value>pending</value>
+                </choice>
+            </attribute>
+            <optional>
+                <attribute name="extended-status"> <text /> </attribute>
+            </optional>
+            <optional>
+                <attribute name="delegate"> <text /> </attribute>
+            </optional>
+            <attribute name="action"> <text /> </attribute>
+            <attribute name="target"> <text /> </attribute>
+            <attribute name="client"> <text /> </attribute>
+            <attribute name="origin"> <text /> </attribute>
+            <optional>
+                <attribute name="completed"> <text /> </attribute>
+            </optional>
+        </element>
+    </define>
+</grammar>
-- 
2.27.0


From 46dd9b74d2ee8f7ab70a0c7fe3a998954d4029e8 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:47:16 -0600
Subject: [PATCH 13/15] Low: schemas: update fence-event API schema for recent
 change

---
 xml/api/fence-event-2.15.rng | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
index e54687cd25..8e000cafa5 100644
--- a/xml/api/fence-event-2.15.rng
+++ b/xml/api/fence-event-2.15.rng
@@ -18,6 +18,9 @@
             <optional>
                 <attribute name="extended-status"> <text /> </attribute>
             </optional>
+            <optional>
+                <attribute name="exit-reason"> <text /> </attribute>
+            </optional>
             <optional>
                 <attribute name="delegate"> <text /> </attribute>
             </optional>
-- 
2.27.0


From 350e71772f67f28af6b67f864cbabc481730035c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 11:32:09 -0600
Subject: [PATCH 14/15] Build: libstonithd: bump shared library version

... for stonith_history_t change since 2.1.2.

The struct should only ever be returned by the library as a pointer, so the
changes can be considered backward-compatible. Normally we wouldn't bump shared
library versions mid-cycle, but this will simplify expected backports of this
change.
---
 lib/fencing/Makefile.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am
index 1ffa3e051b..a10ddb88ec 100644
--- a/lib/fencing/Makefile.am
+++ b/lib/fencing/Makefile.am
@@ -2,7 +2,7 @@
 # Original Author: Sun Jiang Dong <sunjd@cn.ibm.com>
 # Copyright 2004 International Business Machines
 #
-# with later changes copyright 2004-2021 the Pacemaker project contributors.
+# with later changes copyright 2004-2022 the Pacemaker project contributors.
 # The version control history for this file may have further details.
 #
 # This source code is licensed under the GNU General Public License version 2
@@ -14,7 +14,7 @@ noinst_HEADERS		= fencing_private.h
 
 lib_LTLIBRARIES		= libstonithd.la
 
-libstonithd_la_LDFLAGS	= -version-info 33:0:7
+libstonithd_la_LDFLAGS	= -version-info 34:0:8
 
 libstonithd_la_CFLAGS	= $(CFLAGS_HARDENED_LIB)
 libstonithd_la_LDFLAGS	+= $(LDFLAGS_HARDENED_LIB)
-- 
2.27.0


From 63ea88620a62ff0759560a02bb5e284ebdd03eb6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 19 Jan 2022 16:53:45 -0600
Subject: [PATCH 15/15] Low: fencer: reset op result before grabbing new one

just in case
---
 daemons/fenced/fenced_remote.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 7fb7695fba..dc4649e0fc 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -2219,6 +2219,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         return;
     }
 
+    pcmk__reset_result(&op->result);
     op->result = result; // The operation takes ownership of the result
 
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
-- 
2.27.0