Blob Blame History Raw
From 3d10dad9a555aae040d8473edfe31a4e4279c066 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Nov 2021 12:34:03 -0600
Subject: [PATCH 01/19] Refactor: libcrmcommon: add internal API for checking
 for fencing action

The naming is a little awkward -- "fencing action" has multiple meanings
depending on the context. It can refer to fencer API requests, fence device
actions, fence agent actions, or just those actions that fence a node (off and
reboot).

This new function pcmk__is_fencing_action() uses the last meaning, so it does
*not* return true for unfencing ("on" actions).
---
 include/crm/common/internal.h |  1 +
 lib/common/operations.c       | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
index a35c5769a..694fc6cd4 100644
--- a/include/crm/common/internal.h
+++ b/include/crm/common/internal.h
@@ -218,6 +218,7 @@ char *pcmk__notify_key(const char *rsc_id, const char *notify_type,
 char *pcmk__transition_key(int transition_id, int action_id, int target_rc,
                            const char *node);
 void pcmk__filter_op_for_digest(xmlNode *param_set);
+bool pcmk__is_fencing_action(const char *action);
 
 
 // bitwise arithmetic utilities
diff --git a/lib/common/operations.c b/lib/common/operations.c
index aa7106ce6..366c18970 100644
--- a/lib/common/operations.c
+++ b/lib/common/operations.c
@@ -523,3 +523,17 @@ crm_op_needs_metadata(const char *rsc_class, const char *op)
                             CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED,
                             CRMD_ACTION_NOTIFY, NULL);
 }
+
+/*!
+ * \internal
+ * \brief Check whether an action name is for a fencing action
+ *
+ * \param[in] action  Action name to check
+ *
+ * \return true if \p action is "off", "reboot", or "poweroff", otherwise false
+ */
+bool
+pcmk__is_fencing_action(const char *action)
+{
+    return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL);
+}
-- 
2.27.0


From 86ac00fb3e99d79ca2c442ae1670fe850146f734 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Nov 2021 12:38:58 -0600
Subject: [PATCH 02/19] Low: fencer,scheduler: compare fence action names
 case-sensitively

Use the new convenience function pcmk__is_fencing_action() to check whether
an action name is a fencing action ("off", "reboot", or "poweroff"). This
changes the behavior from case-insensitive to case-sensitive, which is more
appropriate (the case-insensitivity was inherited from lazy use of the old
safe_str_eq() function which was always case-insensitive).
---
 daemons/fenced/fenced_commands.c    | 6 +++---
 daemons/fenced/fenced_remote.c      | 2 +-
 lib/pacemaker/pcmk_graph_producer.c | 2 +-
 lib/pengine/common.c                | 8 +-------
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 63bfad3a9..46c840f2a 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -128,7 +128,7 @@ get_action_delay_max(stonith_device_t * device, const char * action)
     const char *value = NULL;
     int delay_max = 0;
 
-    if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
+    if (!pcmk__is_fencing_action(action)) {
         return 0;
     }
 
@@ -146,7 +146,7 @@ get_action_delay_base(stonith_device_t *device, const char *action, const char *
     char *hash_value = NULL;
     int delay_base = 0;
 
-    if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
+    if (!pcmk__is_fencing_action(action)) {
         return 0;
     }
 
@@ -448,7 +448,7 @@ stonith_device_execute(stonith_device_t * device)
 
     if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
                          STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
-        if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
+        if (pcmk__is_fencing_action(cmd->action)) {
             if (node_does_watchdog_fencing(stonith_our_uname)) {
                 pcmk__panic(__func__);
                 goto done;
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 963433bf3..358ea3aa7 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1758,7 +1758,7 @@ all_topology_devices_found(remote_fencing_op_t * op)
     if (!tp) {
         return FALSE;
     }
-    if (pcmk__strcase_any_of(op->action, "off", "reboot", NULL)) {
+    if (pcmk__is_fencing_action(op->action)) {
         /* Don't count the devices on the target node if we are killing
          * the target node. */
         skip_target = TRUE;
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index ffcbd1274..5bec9d8ce 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -721,7 +721,7 @@ add_downed_nodes(xmlNode *xml, const pe_action_t *action,
         /* Fencing makes the action's node and any hosted guest nodes down */
         const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
 
-        if (pcmk__strcase_any_of(fence, "off", "reboot", NULL)) {
+        if (pcmk__is_fencing_action(fence)) {
             xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
             add_node_to_xml_by_id(action->node->details->id, downed);
             pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed);
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index 236fc26b1..fe4223816 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -27,12 +27,6 @@ check_health(const char *value)
                            "migrate-on-red", NULL);
 }
 
-static bool
-check_stonith_action(const char *value)
-{
-    return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL);
-}
-
 static bool
 check_placement_strategy(const char *value)
 {
@@ -114,7 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = {
     },
     {
         "stonith-action", NULL, "select", "reboot, off, poweroff",
-        "reboot", check_stonith_action,
+        "reboot", pcmk__is_fencing_action,
         "Action to send to fence device when a node needs to be fenced "
             "(\"poweroff\" is a deprecated alias for \"off\")",
         NULL
-- 
2.27.0


From c8f6e8a04c4fa4271db817af0a23aa941c9d7689 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 12 Nov 2021 17:42:21 -0600
Subject: [PATCH 03/19] Refactor: fencing: rename type for peer query replies

st_query_result_t contains the device information parsed from a peer's query
reply, but the name could easily be confused with the actual success/failure
result of the query action itself. Rename it to peer_device_info_t.
---
 daemons/fenced/fenced_remote.c | 103 +++++++++++++++++----------------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 358ea3aa7..9e2f62804 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -41,7 +41,7 @@
 
 /* When one fencer queries its peers for devices able to handle a fencing
  * request, each peer will reply with a list of such devices available to it.
- * Each reply will be parsed into a st_query_result_t, with each device's
+ * Each reply will be parsed into a peer_device_info_t, with each device's
  * information kept in a device_properties_t.
  */
 
@@ -72,18 +72,19 @@ typedef struct st_query_result_s {
     int ndevices;
     /* Devices available to this host that are capable of fencing the target */
     GHashTable *devices;
-} st_query_result_t;
+} peer_device_info_t;
 
 GHashTable *stonith_remote_op_list = NULL;
 
-void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc);
+void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer,
+                         int rc);
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
                                   int call_options);
 
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
 static int get_op_total_timeout(const remote_fencing_op_t *op,
-                                const st_query_result_t *chosen_peer);
+                                const peer_device_info_t *chosen_peer);
 
 static gint
 sort_strings(gconstpointer a, gconstpointer b)
@@ -95,7 +96,7 @@ static void
 free_remote_query(gpointer data)
 {
     if (data) {
-        st_query_result_t *query = data;
+        peer_device_info_t *query = data;
 
         crm_trace("Free'ing query result from %s", query->host);
         g_hash_table_destroy(query->devices);
@@ -150,8 +151,8 @@ count_peer_device(gpointer key, gpointer value, gpointer user_data)
  * \return Number of devices available to peer that were not already executed
  */
 static int
-count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
-                   gboolean verified_only)
+count_peer_devices(const remote_fencing_op_t *op,
+                   const peer_device_info_t *peer, gboolean verified_only)
 {
     struct peer_count_data data;
 
@@ -175,7 +176,7 @@ count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
  * \return Device properties if found, NULL otherwise
  */
 static device_properties_t *
-find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
+find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
                  const char *device)
 {
     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
@@ -196,7 +197,7 @@ find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
  * \return TRUE if device was found and marked, FALSE otherwise
  */
 static gboolean
-grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
+grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
                  const char *device, gboolean verified_devices_only)
 {
     device_properties_t *props = find_peer_device(op, peer, device);
@@ -1216,7 +1217,7 @@ enum find_best_peer_options {
     FIND_PEER_VERIFIED_ONLY = 0x0004,
 };
 
-static st_query_result_t *
+static peer_device_info_t *
 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
 {
     GList *iter = NULL;
@@ -1227,7 +1228,7 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer
     }
 
     for (iter = op->query_results; iter != NULL; iter = iter->next) {
-        st_query_result_t *peer = iter->data;
+        peer_device_info_t *peer = iter->data;
 
         crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
                   peer->host, op->target, peer->ndevices,
@@ -1257,11 +1258,11 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer
     return NULL;
 }
 
-static st_query_result_t *
+static peer_device_info_t *
 stonith_choose_peer(remote_fencing_op_t * op)
 {
     const char *device = NULL;
-    st_query_result_t *peer = NULL;
+    peer_device_info_t *peer = NULL;
     uint32_t active = fencing_active_peers();
 
     do {
@@ -1317,8 +1318,8 @@ stonith_choose_peer(remote_fencing_op_t * op)
 }
 
 static int
-get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
-                   const char *device)
+get_device_timeout(const remote_fencing_op_t *op,
+                   const peer_device_info_t *peer, const char *device)
 {
     device_properties_t *props;
 
@@ -1338,7 +1339,7 @@ get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
 
 struct timeout_data {
     const remote_fencing_op_t *op;
-    const st_query_result_t *peer;
+    const peer_device_info_t *peer;
     int total_timeout;
 };
 
@@ -1365,7 +1366,7 @@ add_device_timeout(gpointer key, gpointer value, gpointer user_data)
 }
 
 static int
-get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
+get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
 {
     struct timeout_data timeout;
 
@@ -1380,7 +1381,7 @@ get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
 
 static int
 get_op_total_timeout(const remote_fencing_op_t *op,
-                     const st_query_result_t *chosen_peer)
+                     const peer_device_info_t *chosen_peer)
 {
     int total_timeout = 0;
     stonith_topology_t *tp = find_topology_for_host(op->target);
@@ -1403,7 +1404,7 @@ get_op_total_timeout(const remote_fencing_op_t *op,
             }
             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
-                    const st_query_result_t *peer = iter->data;
+                    const peer_device_info_t *peer = iter->data;
 
                     if (find_peer_device(op, peer, device_list->data)) {
                         total_timeout += get_device_timeout(op, peer,
@@ -1555,7 +1556,7 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
 }
 
 void
-call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
+call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
 {
     const char *device = NULL;
     int timeout = op->base_timeout;
@@ -1734,8 +1735,8 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
 static gint
 sort_peers(gconstpointer a, gconstpointer b)
 {
-    const st_query_result_t *peer_a = a;
-    const st_query_result_t *peer_b = b;
+    const peer_device_info_t *peer_a = a;
+    const peer_device_info_t *peer_b = b;
 
     return (peer_b->ndevices - peer_a->ndevices);
 }
@@ -1768,7 +1769,7 @@ all_topology_devices_found(remote_fencing_op_t * op)
         for (device = tp->levels[i]; device; device = device->next) {
             match = NULL;
             for (iter = op->query_results; iter && !match; iter = iter->next) {
-                st_query_result_t *peer = iter->data;
+                peer_device_info_t *peer = iter->data;
 
                 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
                     continue;
@@ -1850,31 +1851,31 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device,
  *
  * \param[in]     xml       XML node containing device properties
  * \param[in,out] op        Operation that query and reply relate to
- * \param[in,out] result    Peer's results
+ * \param[in,out] peer      Peer's device information
  * \param[in]     device    ID of device being parsed
  */
 static void
 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
-                      st_query_result_t *result, const char *device)
+                      peer_device_info_t *peer, const char *device)
 {
     xmlNode *child;
     int verified = 0;
     device_properties_t *props = calloc(1, sizeof(device_properties_t));
 
-    /* Add a new entry to this result's devices list */
+    /* Add a new entry to this peer's devices list */
     CRM_ASSERT(props != NULL);
-    g_hash_table_insert(result->devices, strdup(device), props);
+    g_hash_table_insert(peer->devices, strdup(device), props);
 
     /* Peers with verified (monitored) access will be preferred */
     crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
     if (verified) {
         crm_trace("Peer %s has confirmed a verified device %s",
-                  result->host, device);
+                  peer->host, device);
         props->verified = TRUE;
     }
 
     /* Parse action-specific device properties */
-    parse_action_specific(xml, result->host, device, op_requested_action(op),
+    parse_action_specific(xml, peer->host, device, op_requested_action(op),
                           op, st_phase_requested, props);
     for (child = pcmk__xml_first_child(xml); child != NULL;
          child = pcmk__xml_next(child)) {
@@ -1883,10 +1884,10 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
          * winds up getting remapped.
          */
         if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) {
-            parse_action_specific(child, result->host, device, "off",
+            parse_action_specific(child, peer->host, device, "off",
                                   op, st_phase_off, props);
         } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) {
-            parse_action_specific(child, result->host, device, "on",
+            parse_action_specific(child, peer->host, device, "on",
                                   op, st_phase_on, props);
         }
     }
@@ -1903,17 +1904,17 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
  *
  * \return Newly allocated result structure with parsed reply
  */
-static st_query_result_t *
+static peer_device_info_t *
 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
 {
-    st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
+    peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
     xmlNode *child;
 
     // cppcheck seems not to understand the abort logic in CRM_CHECK
     // cppcheck-suppress memleak
-    CRM_CHECK(result != NULL, return NULL);
-    result->host = strdup(host);
-    result->devices = pcmk__strkey_table(free, free);
+    CRM_CHECK(peer != NULL, return NULL);
+    peer->host = strdup(host);
+    peer->devices = pcmk__strkey_table(free, free);
 
     /* Each child element describes one capable device available to the peer */
     for (child = pcmk__xml_first_child(xml); child != NULL;
@@ -1921,17 +1922,17 @@ add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml
         const char *device = ID(child);
 
         if (device) {
-            add_device_properties(child, op, result, device);
+            add_device_properties(child, op, peer, device);
         }
     }
 
-    result->ndevices = g_hash_table_size(result->devices);
-    CRM_CHECK(ndevices == result->ndevices,
+    peer->ndevices = g_hash_table_size(peer->devices);
+    CRM_CHECK(ndevices == peer->ndevices,
               crm_err("Query claimed to have %d device%s but %d found",
-                      ndevices, pcmk__plural_s(ndevices), result->ndevices));
+                      ndevices, pcmk__plural_s(ndevices), peer->ndevices));
 
-    op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
-    return result;
+    op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
+    return peer;
 }
 
 /*!
@@ -1957,7 +1958,7 @@ process_remote_stonith_query(xmlNode * msg)
     const char *id = NULL;
     const char *host = NULL;
     remote_fencing_op_t *op = NULL;
-    st_query_result_t *result = NULL;
+    peer_device_info_t *peer = NULL;
     uint32_t replies_expected;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
 
@@ -1991,7 +1992,7 @@ process_remote_stonith_query(xmlNode * msg)
              op->replies, replies_expected, host,
              op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
     if (ndevices > 0) {
-        result = add_result(op, host, ndevices, dev);
+        peer = add_result(op, host, ndevices, dev);
     }
 
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
@@ -2001,7 +2002,7 @@ process_remote_stonith_query(xmlNode * msg)
         if (op->state == st_query && all_topology_devices_found(op)) {
             /* All the query results are in for the topology, start the fencing ops. */
             crm_trace("All topology devices found");
-            call_remote_stonith(op, result, pcmk_ok);
+            call_remote_stonith(op, peer, pcmk_ok);
 
         } else if (have_all_replies) {
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
@@ -2010,15 +2011,15 @@ process_remote_stonith_query(xmlNode * msg)
         }
 
     } else if (op->state == st_query) {
-        int nverified = count_peer_devices(op, result, TRUE);
+        int nverified = count_peer_devices(op, peer, TRUE);
 
         /* We have a result for a non-topology fencing op that looks promising,
          * go ahead and start fencing before query timeout */
-        if (result && (host_is_target == FALSE) && nverified) {
+        if ((peer != NULL) && !host_is_target && nverified) {
             /* we have a verified device living on a peer that is not the target */
             crm_trace("Found %d verified device%s",
                       nverified, pcmk__plural_s(nverified));
-            call_remote_stonith(op, result, pcmk_ok);
+            call_remote_stonith(op, peer, pcmk_ok);
 
         } else if (have_all_replies) {
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
@@ -2029,10 +2030,10 @@ process_remote_stonith_query(xmlNode * msg)
             crm_trace("Waiting for more peer results before launching fencing operation");
         }
 
-    } else if (result && (op->state == st_done)) {
+    } else if ((peer != NULL) && (op->state == st_done)) {
         crm_info("Discarding query result from %s (%d device%s): "
-                 "Operation is %s", result->host,
-                 result->ndevices, pcmk__plural_s(result->ndevices),
+                 "Operation is %s", peer->host,
+                 peer->ndevices, pcmk__plural_s(peer->ndevices),
                  stonith_op_state_str(op->state));
     }
 
-- 
2.27.0


From 913e0620310089d2250e9ecde383df757f8e8063 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Nov 2021 12:46:37 -0600
Subject: [PATCH 04/19] Low: fencer: improve broadcasting replies for fenced
 originators

If the target of a fencing action was also the originator, the executioner
broadcasts the result on their behalf.

Previously, it would check if the action was not in a list of actions that are
never broadcasted. However we really only want to broadcast off/reboot results
so just check for that instead.

This also rearranges reply creation slightly so we don't trace-log the reply
until it is fully created.
---
 daemons/fenced/fenced_commands.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 46c840f2a..e4185f6e1 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2385,32 +2385,31 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
                  int pid, bool merged)
 {
     xmlNode *reply = NULL;
-    gboolean bcast = FALSE;
+    bool bcast = false;
 
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
 
     reply = construct_async_reply(cmd, result);
 
-    // Only replies for certain actions are broadcast
-    if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status",
-                         NULL)) {
-        crm_trace("Never broadcast '%s' replies", cmd->action);
+    // If target was also the originator, broadcast fencing results for it
+    if (!stand_alone && pcmk__is_fencing_action(cmd->action)
+        && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) {
 
-    } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) {
-        crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
+        crm_trace("Broadcast '%s' result for %s (target was also originator)",
+                  cmd->action, cmd->victim);
         crm_xml_add(reply, F_SUBTYPE, "broadcast");
-        bcast = TRUE;
+        crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
+        bcast = true;
     }
 
     log_async_result(cmd, result, pid, NULL, merged);
-    crm_log_xml_trace(reply, "Reply");
 
     if (merged) {
         crm_xml_add(reply, F_STONITH_MERGED, "true");
     }
+    crm_log_xml_trace(reply, "Reply");
 
     if (bcast) {
-        crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
 
     } else if (cmd->origin) {
-- 
2.27.0


From 8b8f94fd9ca5e61922cb81e32c8a3d0f1d75fb0b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Nov 2021 14:40:49 -0600
Subject: [PATCH 05/19] Refactor: fencer: avoid code duplication when sending
 async reply

... and clean up reply function
---
 daemons/fenced/fenced_commands.c | 33 ++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index e4185f6e1..4ea0a337a 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2411,15 +2411,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
 
     if (bcast) {
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
-
-    } else if (cmd->origin) {
-        crm_trace("Directed reply to %s", cmd->origin);
-        send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
-
     } else {
-        crm_trace("Directed local %ssync reply to %s",
-                  (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
-        do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
+        stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client);
     }
 
     if (stand_alone) {
@@ -2814,16 +2807,28 @@ check_alternate_host(const char *target)
     return alternate_host;
 }
 
+/*!
+ * \internal
+ * \brief Send a reply to a CPG peer or IPC client
+ *
+ * \param[in] reply         XML reply to send
+ * \param[in] call_options  Send synchronously if st_opt_sync_call is set here
+ * \param[in] remote_peer   If not NULL, name of peer node to send CPG reply
+ * \param[in] client_id     If not NULL, name of client to send IPC reply
+ */
 static void
-stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
+stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer,
                    const char *client_id)
 {
-    if (remote_peer) {
-        send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
-    } else {
+    CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client_id != NULL)),
+              return);
+
+    if (remote_peer == NULL) {
         do_local_reply(reply, client_id,
-                       pcmk_is_set(call_options, st_opt_sync_call),
-                       (remote_peer != NULL));
+                       pcmk_is_set(call_options, st_opt_sync_call), FALSE);
+    } else {
+        send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng,
+                             reply, FALSE);
     }
 }
 
-- 
2.27.0


From 2cdbda58f0e9f38a0e302506107fd933cb415144 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 23 Nov 2021 17:24:09 -0600
Subject: [PATCH 06/19] Refactor: fencer: ensure all requests get clean-up

handle_request() has if-else blocks for each type of request. Previously, if a
request didn't need a reply, the function would do any clean-up needed and
return immediately. Now, we track whether a reply is needed, and all request
types flow to the end of the function for consistent clean-up.

This doesn't change any behavior at this point, but allows us to do more at the
end of request handling.
---
 daemons/fenced/fenced_commands.c | 46 ++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 4ea0a337a..19477b49b 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2892,6 +2892,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
     xmlNode *data = NULL;
     xmlNode *reply = NULL;
+    bool need_reply = true;
 
     char *output = NULL;
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
@@ -2921,10 +2922,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         pcmk__ipc_send_xml(client, id, reply, flags);
         client->request_id = 0;
         free_xml(reply);
-        return 0;
+        rc = pcmk_ok;
+        need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
         rc = stonith_device_action(request, &output);
+        need_reply = (rc != -EINPROGRESS);
 
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
@@ -2933,7 +2936,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
-        return 0;
+        rc = pcmk_ok;
+        need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
         if (remote_peer) {
@@ -2944,7 +2948,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         remove_relay_op(request);
 
         stonith_query(request, remote_peer, client_id, call_options);
-        return 0;
+        rc = pcmk_ok;
+        need_reply = false;
 
     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
         const char *flag_name = NULL;
@@ -2965,7 +2970,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         }
 
         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
-        return 0;
+        rc = pcmk_ok;
+        need_reply = false;
 
     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
         xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
@@ -2977,8 +2983,11 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                    crm_element_value(dev, F_STONITH_ACTION),
                    crm_element_value(dev, F_STONITH_TARGET));
 
-        if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
+        if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) {
+            rc = -EPROTO;
+        } else {
             rc = -EINPROGRESS;
+            need_reply = false;
         }
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
@@ -3012,7 +3021,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
 
                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
-                    rc = 0;
+                    rc = pcmk_ok;
                     goto done;
                 }
 
@@ -3047,10 +3056,13 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                                      FALSE);
                 rc = -EINPROGRESS;
 
-            } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
+            } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) {
+                rc = -EPROTO;
+            } else {
                 rc = -EINPROGRESS;
             }
         }
+        need_reply = (rc != -EINPROGRESS);
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
         rc = stonith_fence_history(request, &data, remote_peer, call_options);
@@ -3058,8 +3070,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
             /* we don't expect answers to the broadcast
              * we might have sent out
              */
-            free_xml(data);
-            return pcmk_ok;
+            rc = pcmk_ok;
+            need_reply = false;
         }
 
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
@@ -3111,8 +3123,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         crm_element_value_int(request, XML_ATTR_ID, &node_id);
         name = crm_element_value(request, XML_ATTR_UNAME);
         reap_crm_member(node_id, name);
-
-        return pcmk_ok;
+        rc = pcmk_ok;
+        need_reply = false;
 
     } else {
         crm_err("Unknown IPC request %s from %s %s", op,
@@ -3120,20 +3132,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
     }
 
-  done:
-
+done:
     if (rc == -EACCES) {
         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
                  crm_str(op), pcmk__client_name(client));
     }
 
-    /* Always reply unless the request is in process still.
-     * If in progress, a reply will happen async after the request
-     * processing is finished */
-    if (rc != -EINPROGRESS) {
-        crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
-                  id, pcmk_is_set(call_options, st_opt_sync_call), call_options,
-                  crm_element_value(request, F_STONITH_CALLOPTS));
+    // Reply if result is known
+    if (need_reply) {
 
         if (pcmk_is_set(call_options, st_opt_sync_call)) {
             CRM_ASSERT(client == NULL || client->request_id == id);
-- 
2.27.0


From 067d655ebd3fbb0ed27f4e7426db4c3b661ba777 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 23 Nov 2021 17:26:32 -0600
Subject: [PATCH 07/19] Log: fencer: improve debug logs when processing CPG/IPC
 messages

By moving the result log messages from stonith_command() to handle_reply() and
handle_request(), we can simplify stonith_command() and give slightly better
messages.
---
 daemons/fenced/fenced_commands.c | 80 +++++++++++++++-----------------
 1 file changed, 38 insertions(+), 42 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 19477b49b..98af0e04f 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2883,7 +2883,7 @@ remove_relay_op(xmlNode * request)
     }
 }
 
-static int
+static void
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                xmlNode *request, const char *remote_peer)
 {
@@ -3152,73 +3152,69 @@ done:
     free_xml(data);
     free_xml(reply);
 
-    return rc;
+    crm_debug("Processed %s request from %s %s: %s (rc=%d)",
+              op, ((client == NULL)? "peer" : "client"),
+              ((client == NULL)? remote_peer : pcmk__client_name(client)),
+              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
 }
 
 static void
 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
 {
-    const char *op = crm_element_value(request, F_STONITH_OPERATION);
+    // Copy, because request might be freed before we want to log this
+    char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
 
     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
         process_remote_stonith_query(request);
-    } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
-        process_remote_stonith_exec(request);
-    } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
-        /* Reply to a complex fencing op */
+    } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
         process_remote_stonith_exec(request);
     } else {
-        crm_err("Unknown %s reply from %s %s", op,
-                ((client == NULL)? "peer" : "client"),
+        crm_err("Ignoring unknown %s reply from %s %s",
+                crm_str(op), ((client == NULL)? "peer" : "client"),
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
         crm_log_xml_warn(request, "UnknownOp");
+        free(op);
+        return;
     }
+    crm_debug("Processed %s reply from %s %s",
+              op, ((client == NULL)? "peer" : "client"),
+              ((client == NULL)? remote_peer : pcmk__client_name(client)));
+    free(op);
 }
 
+/*!
+ * \internal
+ * \brief Handle a message from an IPC client or CPG peer
+ *
+ * \param[in] client      If not NULL, IPC client that sent message
+ * \param[in] id          If from IPC client, IPC message ID
+ * \param[in] flags       Message flags
+ * \param[in] message     Message XML
+ * \param[in] remote_peer If not NULL, CPG peer that sent message
+ */
 void
 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
-                xmlNode *request, const char *remote_peer)
+                xmlNode *message, const char *remote_peer)
 {
-    int call_options = 0;
-    int rc = 0;
-    gboolean is_reply = FALSE;
-
-    /* Copy op for reporting. The original might get freed by handle_reply()
-     * before we use it in crm_debug():
-     *     handle_reply()
-     *     |- process_remote_stonith_exec()
-     *     |-- remote_op_done()
-     *     |--- handle_local_reply_and_notify()
-     *     |---- crm_xml_add(...F_STONITH_OPERATION...)
-     *     |--- free_xml(op->request)
-     */
-    char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
-
-    if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
-        is_reply = TRUE;
-    }
+    int call_options = st_opt_none;
+    bool is_reply = get_xpath_object("//" T_STONITH_REPLY, message,
+                                     LOG_NEVER) != NULL;
 
-    crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
-    crm_debug("Processing %s%s %u from %s %s with call options 0x%08x",
-              op, (is_reply? " reply" : ""), id,
+    crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options);
+    crm_debug("Processing %ssynchronous %s %s %u from %s %s",
+              pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
+              crm_element_value(message, F_STONITH_OPERATION),
+              (is_reply? "reply" : "request"), id,
               ((client == NULL)? "peer" : "client"),
-              ((client == NULL)? remote_peer : pcmk__client_name(client)),
-              call_options);
+              ((client == NULL)? remote_peer : pcmk__client_name(client)));
 
     if (pcmk_is_set(call_options, st_opt_sync_call)) {
         CRM_ASSERT(client == NULL || client->request_id == id);
     }
 
     if (is_reply) {
-        handle_reply(client, request, remote_peer);
+        handle_reply(client, message, remote_peer);
     } else {
-        rc = handle_request(client, id, flags, request, remote_peer);
+        handle_request(client, id, flags, message, remote_peer);
     }
-
-    crm_debug("Processed %s%s from %s %s: %s (rc=%d)",
-              op, (is_reply? " reply" : ""),
-              ((client == NULL)? "peer" : "client"),
-              ((client == NULL)? remote_peer : pcmk__client_name(client)),
-              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
-    free(op);
 }
-- 
2.27.0


From 44cb340c11b4652f452a47eb2b0050b4a459382b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 15 Nov 2021 16:29:09 -0600
Subject: [PATCH 08/19] Refactor: fencer: drop unused argument from
 notification functions

---
 daemons/fenced/fenced_commands.c  | 12 ++++++------
 daemons/fenced/fenced_history.c   |  6 +++---
 daemons/fenced/fenced_remote.c    |  6 +++---
 daemons/fenced/pacemaker-fenced.c | 18 +++++++++---------
 daemons/fenced/pacemaker-fenced.h |  6 +++---
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 98af0e04f..946ce4042 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2428,8 +2428,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
 
-        do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+        do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
 
     free_xml(reply);
@@ -3082,7 +3082,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         } else {
             rc = -EACCES;
         }
-        do_stonith_notify_device(call_options, op, rc, device_id);
+        do_stonith_notify_device(op, rc, device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
@@ -3093,7 +3093,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         } else {
             rc = -EACCES;
         }
-        do_stonith_notify_device(call_options, op, rc, device_id);
+        do_stonith_notify_device(op, rc, device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
         char *device_id = NULL;
@@ -3103,7 +3103,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         } else {
             rc = -EACCES;
         }
-        do_stonith_notify_level(call_options, op, rc, device_id);
+        do_stonith_notify_level(op, rc, device_id);
         free(device_id);
 
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
@@ -3114,7 +3114,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         } else {
             rc = -EACCES;
         }
-        do_stonith_notify_level(call_options, op, rc, device_id);
+        do_stonith_notify_level(op, rc, device_id);
 
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
         int node_id = 0;
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 1ba034ba9..7127593b6 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target,
         g_hash_table_foreach_remove(stonith_remote_op_list,
                              stonith_remove_history_entry,
                              (gpointer) target);
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
 }
 
@@ -396,7 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
 
     if (updated) {
         stonith_fence_history_trim();
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
 
     if (cnt == 0) {
@@ -470,7 +470,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
            is done so send a notification for anything
            that smells like history-sync
          */
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL);
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL);
         if (crm_element_value(msg, F_STONITH_CALLID)) {
             /* this is coming from the stonith-API
             *
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 9e2f62804..c907cd120 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -423,8 +423,8 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
 
     /* bcast to all local clients that the fencing operation happend */
-    do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
-    do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+    do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
+    do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
 
     /* mark this op as having notify's already sent */
     op->notify_sent = TRUE;
@@ -1119,7 +1119,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
 
     if (op->state != st_duplicate) {
         /* kick history readers */
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
 
     /* safe to trim as long as that doesn't touch pending ops */
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index a64004ce1..a290e1670 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -357,7 +357,7 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int
 }
 
 void
-do_stonith_notify(int options, const char *type, int result, xmlNode * data)
+do_stonith_notify(const char *type, int result, xmlNode *data)
 {
     /* TODO: Standardize the contents of data */
     xmlNode *update_msg = create_xml_node(NULL, "notify");
@@ -380,7 +380,7 @@ do_stonith_notify(int options, const char *type, int result, xmlNode * data)
 }
 
 static void
-do_stonith_notify_config(int options, const char *op, int rc,
+do_stonith_notify_config(const char *op, int rc,
                          const char *desc, int active)
 {
     xmlNode *notify_data = create_xml_node(NULL, op);
@@ -390,20 +390,20 @@ do_stonith_notify_config(int options, const char *op, int rc,
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
 
-    do_stonith_notify(options, op, rc, notify_data);
+    do_stonith_notify(op, rc, notify_data);
     free_xml(notify_data);
 }
 
 void
-do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
+do_stonith_notify_device(const char *op, int rc, const char *desc)
 {
-    do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
+    do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list));
 }
 
 void
-do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
+do_stonith_notify_level(const char *op, int rc, const char *desc)
 {
-    do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
+    do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology));
 }
 
 static void
@@ -418,7 +418,7 @@ topology_remove_helper(const char *node, int level)
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
 
     rc = stonith_level_remove(data, &desc);
-    do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
+    do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc);
 
     free_xml(data);
     free(desc);
@@ -468,7 +468,7 @@ handle_topology_change(xmlNode *match, bool remove)
     }
 
     rc = stonith_level_register(match, &desc);
-    do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
+    do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc);
 
     free(desc);
 }
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index a64b57693..3e41d867e 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -233,9 +233,9 @@ xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode
 void
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
 
-void do_stonith_notify(int options, const char *type, int result, xmlNode * data);
-void do_stonith_notify_device(int options, const char *op, int rc, const char *desc);
-void do_stonith_notify_level(int options, const char *op, int rc, const char *desc);
+void do_stonith_notify(const char *type, int result, xmlNode *data);
+void do_stonith_notify_device(const char *op, int rc, const char *desc);
+void do_stonith_notify_level(const char *op, int rc, const char *desc);
 
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
                                                 xmlNode *request,
-- 
2.27.0


From a49df4901b663b3366634c1d58f04625ecba4005 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Nov 2021 11:57:14 -0600
Subject: [PATCH 09/19] Refactor: fencer: functionize checking for privileged
 client

... for readability and to make planned changes easier
---
 daemons/fenced/fenced_commands.c | 49 +++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 946ce4042..34c956f5c 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2883,6 +2883,32 @@ remove_relay_op(xmlNode * request)
     }
 }
 
+/*!
+ * \internal
+ * \brief Check whether an API request was sent by a privileged user
+ *
+ * API commands related to fencing configuration may be done only by privileged
+ * IPC users (i.e. root or hacluster), because all other users should go through
+ * the CIB to have ACLs applied. If no client was given, this is a peer request,
+ * which is always allowed.
+ *
+ * \param[in] c   IPC client that sent request (or NULL if sent by CPG peer)
+ * \param[in] op  Requested API operation (for logging only)
+ *
+ * \return true if sender is peer or privileged client, otherwise false
+ */
+static inline bool
+is_privileged(pcmk__client_t *c, const char *op)
+{
+    if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
+        return true;
+    } else {
+        crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
+                 crm_str(op), pcmk__client_name(c));
+        return false;
+    }
+}
+
 static void
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
                xmlNode *request, const char *remote_peer)
@@ -2898,15 +2924,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
 
-    /* IPC commands related to fencing configuration may be done only by
-     * privileged users (i.e. root or hacluster), because all other users should
-     * go through the CIB to have ACLs applied.
-     *
-     * If no client was given, this is a peer request, which is always allowed.
-     */
-    bool allowed = (client == NULL)
-                   || pcmk_is_set(client->flags, pcmk__client_privileged);
-
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
 
     if (pcmk_is_set(call_options, st_opt_sync_call)) {
@@ -3077,7 +3094,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
         const char *device_id = NULL;
 
-        if (allowed) {
+        if (is_privileged(client, op)) {
             rc = stonith_device_register(request, &device_id, FALSE);
         } else {
             rc = -EACCES;
@@ -3088,7 +3105,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
 
-        if (allowed) {
+        if (is_privileged(client, op)) {
             rc = stonith_device_remove(device_id, FALSE);
         } else {
             rc = -EACCES;
@@ -3098,7 +3115,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
         char *device_id = NULL;
 
-        if (allowed) {
+        if (is_privileged(client, op)) {
             rc = stonith_level_register(request, &device_id);
         } else {
             rc = -EACCES;
@@ -3109,7 +3126,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
         char *device_id = NULL;
 
-        if (allowed) {
+        if (is_privileged(client, op)) {
             rc = stonith_level_remove(request, &device_id);
         } else {
             rc = -EACCES;
@@ -3133,14 +3150,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     }
 
 done:
-    if (rc == -EACCES) {
-        crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
-                 crm_str(op), pcmk__client_name(client));
-    }
-
     // Reply if result is known
     if (need_reply) {
-
         if (pcmk_is_set(call_options, st_opt_sync_call)) {
             CRM_ASSERT(client == NULL || client->request_id == id);
         }
-- 
2.27.0


From 10ca8a5ef5266159bc3f993802aeae6537ceeb11 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Nov 2021 16:59:03 -0600
Subject: [PATCH 10/19] Low: fencer: return -ETIME for peer fencing timeouts

94c55684 set the result as pcmk_ok, but it appears that the intent was just to
keep the delegate from being set, and -ETIME should still do that, while being
more appropriate.
---
 daemons/fenced/fenced_remote.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index c907cd120..dc7b802da 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -608,7 +608,7 @@ remote_op_timeout_one(gpointer userdata)
 
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
                " id=%.8s", op->action, op->target, op->client_name, op->id);
-    call_remote_stonith(op, NULL, pcmk_ok);
+    call_remote_stonith(op, NULL, -ETIME);
     return FALSE;
 }
 
-- 
2.27.0


From fb2eefeb695cc92e1a2aed6f1f1d2b900d4fb83e Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Nov 2021 17:54:56 -0600
Subject: [PATCH 11/19] Refactor: fencer: functionize common part of timeout
 handling

Previously, remote_op_timeout() was called from multiple places, but only one
of those places needed the full processing. The common part is now in a new
function finalize_timed_out_op() called from all the places, and
remote_op_timeout() now has just the additional processing needed by the one
place plus a call to the new function.

This will allow a future change to set a different exit reason depending on
which step timed out.
---
 daemons/fenced/fenced_remote.c | 49 +++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index dc7b802da..22c4b0772 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -612,20 +612,18 @@ remote_op_timeout_one(gpointer userdata)
     return FALSE;
 }
 
-static gboolean
-remote_op_timeout(gpointer userdata)
+/*!
+ * \internal
+ * \brief Finalize a remote fencer operation that timed out
+ *
+ * \param[in] op      Fencer operation that timed out
+ */
+static void
+finalize_timed_out_op(remote_fencing_op_t *op)
 {
-    remote_fencing_op_t *op = userdata;
 
     op->op_timer_total = 0;
 
-    if (op->state == st_done) {
-        crm_debug("Action '%s' targeting %s for client %s already completed "
-                  CRM_XS " id=%.8s",
-                  op->action, op->target, op->client_name, op->id);
-        return FALSE;
-    }
-
     crm_debug("Action '%s' targeting %s for client %s timed out "
               CRM_XS " id=%.8s",
               op->action, op->target, op->client_name, op->id);
@@ -637,14 +635,35 @@ remote_op_timeout(gpointer userdata)
          */
         op->state = st_done;
         remote_op_done(op, NULL, pcmk_ok, FALSE);
-        return FALSE;
+        return;
     }
 
     op->state = st_failed;
 
     remote_op_done(op, NULL, -ETIME, FALSE);
+}
 
-    return FALSE;
+/*!
+ * \internal
+ * \brief Finalize a remote fencer operation that timed out
+ *
+ * \param[in] userdata  Fencer operation that timed out
+ *
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
+ */
+static gboolean
+remote_op_timeout(gpointer userdata)
+{
+    remote_fencing_op_t *op = userdata;
+
+    if (op->state == st_done) {
+        crm_debug("Action '%s' targeting %s for client %s already completed "
+                  CRM_XS " id=%.8s",
+                  op->action, op->target, op->client_name, op->id);
+    } else {
+        finalize_timed_out_op(userdata);
+    }
+    return G_SOURCE_REMOVE;
 }
 
 static gboolean
@@ -670,7 +689,7 @@ remote_op_query_timeout(gpointer data)
             g_source_remove(op->op_timer_total);
             op->op_timer_total = 0;
         }
-        remote_op_timeout(op);
+        finalize_timed_out_op(op);
     }
 
     return FALSE;
@@ -1675,8 +1694,8 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
         crm_info("No remaining peers capable of fencing (%s) %s for client %s "
                  CRM_XS " state=%s", op->action, op->target, op->client_name,
                  stonith_op_state_str(op->state));
-        CRM_LOG_ASSERT(op->state < st_done);
-        remote_op_timeout(op);
+        CRM_CHECK(op->state < st_done, return);
+        finalize_timed_out_op(op);
 
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
 //        int rc = -EHOSTUNREACH;
-- 
2.27.0


From c047005a112ac7da5ba62084e39c79db739f0923 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 10:05:18 -0600
Subject: [PATCH 12/19] Low: fencer: handle malformed manual confirmation
 requests better

Rename stonith_manual_ack() to fenced_handle_manual_confirmation(), and move
more of the manual confirmation handling in handle_request() into it, for
better code isolation. This will also make planned changes easier.

The one behavioral difference is that a failure of initiate_remote_stonith_op()
will now be ignored rather than segmentation fault trying to dereference NULL.
---
 daemons/fenced/fenced_commands.c  | 20 ++++++++++++--------
 daemons/fenced/fenced_remote.c    | 29 ++++++++++++++++++++++++-----
 daemons/fenced/pacemaker-fenced.h |  2 +-
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 34c956f5c..6f325b9e8 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -3012,14 +3012,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         if (remote_peer || stand_alone) {
             rc = stonith_fence(request);
 
-        } else if (call_options & st_opt_manual_ack) {
-            remote_fencing_op_t *rop = NULL;
-            xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
-            const char *target = crm_element_value(dev, F_STONITH_TARGET);
-
-            crm_notice("Received manual confirmation that %s is fenced", target);
-            rop = initiate_remote_stonith_op(client, request, TRUE);
-            rc = stonith_manual_ack(request, rop);
+        } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
+            switch (fenced_handle_manual_confirmation(client, request)) {
+                case pcmk_rc_ok:
+                    rc = pcmk_ok;
+                    break;
+                case EINPROGRESS:
+                    rc = -EINPROGRESS;
+                    break;
+                default:
+                    rc = -EPROTO;
+                    break;
+            }
 
         } else {
             const char *alternate_host = NULL;
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 22c4b0772..60ee5e32e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1003,22 +1003,41 @@ static uint32_t fencing_active_peers(void)
     return count;
 }
 
+/*!
+ * \internal
+ * \brief Process a manual confirmation of a pending fence action
+ *
+ * \param[in]  client  IPC client that sent confirmation
+ * \param[in]  msg     Request XML with manual confirmation
+ *
+ * \return Standard Pacemaker return code
+ */
 int
-stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
+fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
 {
+    remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
 
+    CRM_CHECK(dev != NULL, return EPROTO);
+
+    crm_notice("Received manual confirmation that %s has been fenced",
+               crm_str(crm_element_value(dev, F_STONITH_TARGET)));
+    op = initiate_remote_stonith_op(client, msg, TRUE);
+    if (op == NULL) {
+        return EPROTO;
+    }
     op->state = st_done;
     set_fencing_completed(op);
     op->delegate = strdup("a human");
 
-    crm_notice("Injecting manual confirmation that %s is safely off/down",
-               crm_element_value(dev, F_STONITH_TARGET));
+    // For the fencer's purposes, the fencing operation is done
 
     remote_op_done(op, msg, pcmk_ok, FALSE);
 
-    // Replies are sent via done_cb -> send_async_reply() -> do_local_reply()
-    return -EINPROGRESS;
+    /* For the requester's purposes, the operation is still pending. The
+     * actual result will be sent asynchronously via the operation's done_cb().
+     */
+    return EINPROGRESS;
 }
 
 /*!
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 3e41d867e..cf88644f1 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -256,7 +256,7 @@ bool fencing_peer_active(crm_node_t *peer);
 
 void set_fencing_completed(remote_fencing_op_t * op);
 
-int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
+int fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg);
 
 gboolean node_has_attr(const char *node, const char *name, const char *value);
 
-- 
2.27.0


From ec60f014b5a8f774aa57a26e40a2b1b94a7e3d3a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 10:35:31 -0600
Subject: [PATCH 13/19] Low: fencer: handle malformed topology level removal
 requests better

Log the malformed request, and return -EPROTO instead of -EINVAL. If a request
is missing a level number, treat it as malformed instead of as a request to
remove all.
---
 daemons/fenced/fenced_commands.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 6f325b9e8..358844203 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1678,27 +1678,27 @@ stonith_level_register(xmlNode *msg, char **desc)
 int
 stonith_level_remove(xmlNode *msg, char **desc)
 {
-    int id = 0;
+    int id = -1;
     stonith_topology_t *tp;
     char *target;
 
     /* Unlike additions, removal requests should always have one level tag */
     xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
 
-    CRM_CHECK(level != NULL, return -EINVAL);
+    CRM_CHECK(level != NULL, return -EPROTO);
 
     target = stonith_level_key(level, -1);
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
+
+    CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX),
+              crm_log_xml_warn(msg, "invalid level");
+              free(target);
+              return -EPROTO);
+
     if (desc) {
         *desc = crm_strdup_printf("%s[%d]", target, id);
     }
 
-    /* Sanity-check arguments */
-    if (id >= ST_LEVEL_MAX) {
-        free(target);
-        return -EINVAL;
-    }
-
     tp = g_hash_table_lookup(topology, target);
     if (tp == NULL) {
         guint nentries = g_hash_table_size(topology);
@@ -1714,7 +1714,7 @@ stonith_level_remove(xmlNode *msg, char **desc)
                  "(%d active %s remaining)", target, nentries,
                  pcmk__plural_alt(nentries, "entry", "entries"));
 
-    } else if (id > 0 && tp->levels[id] != NULL) {
+    } else if (tp->levels[id] != NULL) {
         guint nlevels;
 
         g_list_free_full(tp->levels[id], free);
-- 
2.27.0


From ee0cfb6b284c2d6d21f8e77bf6ff286b1364235d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 12:33:05 -0600
Subject: [PATCH 14/19] Refactor: fencer: avoid obscuring a variable

handle_request() declared a xmlNode *reply variable, and then one of its "if"
blocks defined another one, obscuring the first. Drop the first declaration,
and instead move it to the one other place that needed it.

Also remove a redundant assertion.
---
 daemons/fenced/fenced_commands.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 358844203..af0a92450 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2917,7 +2917,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     int rc = -EOPNOTSUPP;
 
     xmlNode *data = NULL;
-    xmlNode *reply = NULL;
     bool need_reply = true;
 
     char *output = NULL;
@@ -2926,8 +2925,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
 
-    if (pcmk_is_set(call_options, st_opt_sync_call)) {
-        CRM_ASSERT(client == NULL || client->request_id == id);
+    if (pcmk_is_set(call_options, st_opt_sync_call) && (client != NULL)) {
+        CRM_ASSERT(client->request_id == id);
     }
 
     if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
@@ -3156,16 +3155,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
 done:
     // Reply if result is known
     if (need_reply) {
-        if (pcmk_is_set(call_options, st_opt_sync_call)) {
-            CRM_ASSERT(client == NULL || client->request_id == id);
-        }
-        reply = stonith_construct_reply(request, output, data, rc);
+        xmlNode *reply = stonith_construct_reply(request, output, data, rc);
+
         stonith_send_reply(reply, call_options, remote_peer, client_id);
+        free_xml(reply);
     }
 
     free(output);
     free_xml(data);
-    free_xml(reply);
 
     crm_debug("Processed %s request from %s %s: %s (rc=%d)",
               op, ((client == NULL)? "peer" : "client"),
-- 
2.27.0


From a5fef7b95b7541860e29c1ff33be38db327208fb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Nov 2021 12:37:10 -0600
Subject: [PATCH 15/19] Refactor: fencer: add convenience function for setting
 protocol error result

The fencer will soon track and return the full result (rather than just a
legacy return code) for fencing actions, for callbacks and notifications.
To simplify that process as well as move away from the legacy codes in general,
all fencer API operations will be modified to return a full result.

This convenience function will come in handy for that.
---
 daemons/fenced/pacemaker-fenced.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index cf88644f1..3bc5dc3d1 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -262,6 +262,13 @@ gboolean node_has_attr(const char *node, const char *name, const char *value);
 
 gboolean node_does_watchdog_fencing(const char *node);
 
+static inline void
+fenced_set_protocol_error(pcmk__action_result_t *result)
+{
+    pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
+                     "Fencer API request missing required information (bug?)");
+}
+
 extern char *stonith_our_uname;
 extern gboolean stand_alone;
 extern GHashTable *device_list;
-- 
2.27.0


From ed770d36fb34dc7b3344cd326830a6c06cc789ce Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 09:59:51 -0600
Subject: [PATCH 16/19] Refactor: fencer: make a few functions return void

... to make planned changes easier. The return values were previously ignored.
---
 daemons/fenced/fenced_commands.c  | 17 ++++++++-------
 daemons/fenced/fenced_history.c   |  6 +-----
 daemons/fenced/fenced_remote.c    | 35 ++++++++++++++-----------------
 daemons/fenced/pacemaker-fenced.c |  6 +++---
 daemons/fenced/pacemaker-fenced.h |  8 +++----
 5 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index af0a92450..ea7d281ce 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1411,8 +1411,8 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
     return pcmk_ok;
 }
 
-int
-stonith_device_remove(const char *id, gboolean from_cib)
+void
+stonith_device_remove(const char *id, bool from_cib)
 {
     stonith_device_t *device = g_hash_table_lookup(device_list, id);
     guint ndevices = 0;
@@ -1421,7 +1421,7 @@ stonith_device_remove(const char *id, gboolean from_cib)
         ndevices = g_hash_table_size(device_list);
         crm_info("Device '%s' not found (%d active device%s)",
                  id, ndevices, pcmk__plural_s(ndevices));
-        return pcmk_ok;
+        return;
     }
 
     if (from_cib) {
@@ -1443,7 +1443,6 @@ stonith_device_remove(const char *id, gboolean from_cib)
                   (device->cib_registered? " cib" : ""),
                   (device->api_registered? " api" : ""));
     }
-    return pcmk_ok;
 }
 
 /*!
@@ -3085,8 +3084,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         need_reply = (rc != -EINPROGRESS);
 
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
-        rc = stonith_fence_history(request, &data, remote_peer, call_options);
-        if (call_options & st_opt_discard_reply) {
+        stonith_fence_history(request, &data, remote_peer, call_options);
+        rc = pcmk_ok;
+        if (pcmk_is_set(call_options, st_opt_discard_reply)) {
             /* we don't expect answers to the broadcast
              * we might have sent out
              */
@@ -3109,7 +3109,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
 
         if (is_privileged(client, op)) {
-            rc = stonith_device_remove(device_id, FALSE);
+            stonith_device_remove(device_id, false);
+            rc = pcmk_ok;
         } else {
             rc = -EACCES;
         }
@@ -3179,7 +3180,7 @@ handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
         process_remote_stonith_query(request);
     } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
-        process_remote_stonith_exec(request);
+        fenced_process_fencing_reply(request);
     } else {
         crm_err("Ignoring unknown %s reply from %s %s",
                 crm_str(op), ((client == NULL)? "peer" : "client"),
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 7127593b6..bc159383c 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -433,14 +433,11 @@ stonith_local_history(gboolean add_id, const char *target)
  *                      a reply from
  * \param[in] remote_peer
  * \param[in] options   call-options from the request
- *
- * \return always success as there is actully nothing that can go really wrong
  */
-int
+void
 stonith_fence_history(xmlNode *msg, xmlNode **output,
                       const char *remote_peer, int options)
 {
-    int rc = 0;
     const char *target = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER);
     xmlNode *out_history = NULL;
@@ -525,5 +522,4 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
         *output = stonith_local_history(FALSE, target);
     }
     free_xml(out_history);
-    return rc;
 }
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 60ee5e32e..6338aebde 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -2086,11 +2086,9 @@ process_remote_stonith_query(xmlNode * msg)
  * or attempt another device as appropriate.
  *
  * \param[in] msg  XML reply received
- *
- * \return pcmk_ok on success, -errno on error
  */
-int
-process_remote_stonith_exec(xmlNode * msg)
+void
+fenced_process_fencing_reply(xmlNode *msg)
 {
     int rc = 0;
     const char *id = NULL;
@@ -2098,13 +2096,13 @@ process_remote_stonith_exec(xmlNode * msg)
     remote_fencing_op_t *op = NULL;
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
 
-    CRM_CHECK(dev != NULL, return -EPROTO);
+    CRM_CHECK(dev != NULL, return);
 
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
-    CRM_CHECK(id != NULL, return -EPROTO);
+    CRM_CHECK(id != NULL, return);
 
     dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
-    CRM_CHECK(dev != NULL, return -EPROTO);
+    CRM_CHECK(dev != NULL, return);
 
     crm_element_value_int(dev, F_STONITH_RC, &rc);
 
@@ -2125,35 +2123,35 @@ process_remote_stonith_exec(xmlNode * msg)
         /* Could be for an event that began before we started */
         /* TODO: Record the op for later querying */
         crm_info("Received peer result of unknown or expired operation %s", id);
-        return -EOPNOTSUPP;
+        return;
     }
 
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
         crm_err("Received outdated reply for device %s (instead of %s) to "
                 "fence (%s) %s. Operation already timed out at peer level.",
                 device, (const char *) op->devices->data, op->action, op->target);
-        return rc;
+        return;
     }
 
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
         crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s "
-                  CRM_XS " rc=%d id=%.8s",
+                  CRM_XS " id=%.8s",
                   op->action, op->target, op->client_name, op->originator,
-                  pcmk_strerror(rc), rc, op->id);
+                  pcmk_strerror(rc), op->id);
         if (rc == pcmk_ok) {
             op->state = st_done;
         } else {
             op->state = st_failed;
         }
         remote_op_done(op, msg, rc, FALSE);
-        return pcmk_ok;
+        return;
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
         /* If this isn't a remote level broadcast, and we are not the
          * originator of the operation, we should not be receiving this msg. */
         crm_err("Received non-broadcast fencing result for operation %.8s "
                 "we do not own (device %s targeting %s)",
                 op->id, device, op->target);
-        return rc;
+        return;
     }
 
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
@@ -2168,7 +2166,7 @@ process_remote_stonith_exec(xmlNode * msg)
          * and notify our local clients. */
         if (op->state == st_done) {
             remote_op_done(op, msg, rc, FALSE);
-            return rc;
+            return;
         }
 
         if ((op->phase == 2) && (rc != pcmk_ok)) {
@@ -2184,14 +2182,14 @@ process_remote_stonith_exec(xmlNode * msg)
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
             advance_topology_device_in_level(op, device, msg, rc);
-            return rc;
+            return;
         } else {
             /* This device failed, time to try another topology level. If no other
              * levels are available, mark this operation as failed and report results. */
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
                 op->state = st_failed;
                 remote_op_done(op, msg, rc, FALSE);
-                return rc;
+                return;
             }
         }
     } else if (rc == pcmk_ok && op->devices == NULL) {
@@ -2199,12 +2197,12 @@ process_remote_stonith_exec(xmlNode * msg)
 
         op->state = st_done;
         remote_op_done(op, msg, rc, FALSE);
-        return rc;
+        return;
     } else if (rc == -ETIME && op->devices == NULL) {
         /* If the operation timed out don't bother retrying other peers. */
         op->state = st_failed;
         remote_op_done(op, msg, rc, FALSE);
-        return rc;
+        return;
     } else {
         /* fall-through and attempt other fencing action using another peer */
     }
@@ -2213,7 +2211,6 @@ process_remote_stonith_exec(xmlNode * msg)
     crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
               op->client_name, rc);
     call_remote_stonith(op, NULL, rc);
-    return rc;
 }
 
 gboolean
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index a290e1670..0a8b3bf6f 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -445,7 +445,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj)
 
         rsc_id = crm_element_value(match, XML_ATTR_ID);
 
-        stonith_device_remove(rsc_id, TRUE);
+        stonith_device_remove(rsc_id, true);
     }
 }
 
@@ -610,7 +610,7 @@ watchdog_device_update(void)
     } else {
         /* be silent if no device - todo parameter to stonith_device_remove */
         if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
-            stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
+            stonith_device_remove(STONITH_WATCHDOG_ID, true);
         }
     }
 }
@@ -847,7 +847,7 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
             }
             if (search != NULL) {
                 *search = 0;
-                stonith_device_remove(rsc_id, TRUE);
+                stonith_device_remove(rsc_id, true);
                 /* watchdog_device_update called afterwards
                    to fall back to implicit definition if needed */
             } else {
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 3bc5dc3d1..5162ada75 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -214,7 +214,7 @@ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
 
 int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
 
-int stonith_device_remove(const char *id, gboolean from_cib);
+void stonith_device_remove(const char *id, bool from_cib);
 
 char *stonith_level_key(xmlNode * msg, int mode);
 int stonith_level_kind(xmlNode * msg);
@@ -241,14 +241,14 @@ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
                                                 xmlNode *request,
                                                 gboolean manual_ack);
 
-int process_remote_stonith_exec(xmlNode * msg);
+void fenced_process_fencing_reply(xmlNode *msg);
 
 int process_remote_stonith_query(xmlNode * msg);
 
 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
 
-int stonith_fence_history(xmlNode *msg, xmlNode **output,
-                          const char *remote_peer, int options);
+void stonith_fence_history(xmlNode *msg, xmlNode **output,
+                           const char *remote_peer, int options);
 
 void stonith_fence_history_trim(void);
 
-- 
2.27.0


From 27df49460930738e77f5ca42536aff1d3bdfcae7 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 19 Nov 2021 10:06:43 -0600
Subject: [PATCH 17/19] Refactor: fencer: drop unnecessary argument when
 advancing topology device

If we're advancing to the next device in a topology level, by necessity that
means any previous device succeeded.
---
 daemons/fenced/fenced_remote.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 6338aebde..d54e6a4ef 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1519,14 +1519,13 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout)
  * \internal
  * \brief Advance an operation to the next device in its topology
  *
- * \param[in,out] op      Operation to advance
- * \param[in]     device  ID of device just completed
- * \param[in]     msg     XML reply that contained device result (if available)
- * \param[in]     rc      Return code of device's execution
+ * \param[in] op      Fencer operation to advance
+ * \param[in] device  ID of device that just completed
+ * \param[in] msg     If not NULL, XML reply of last delegated fencing operation
  */
 static void
 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
-                                 xmlNode *msg, int rc)
+                                 xmlNode *msg)
 {
     /* Advance to the next device at this topology level, if any */
     if (op->devices) {
@@ -1556,8 +1555,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
 
     if (op->devices) {
         /* Necessary devices remain, so execute the next one */
-        crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
-                  op->target, op->client_name, op->originator, rc);
+        crm_trace("Next targeting %s on behalf of %s@%s",
+                  op->target, op->client_name, op->originator);
 
         // The requested delay has been applied for the first device
         if (op->delay > 0) {
@@ -1570,7 +1569,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
         crm_trace("Marking complex fencing op targeting %s as complete",
                   op->target);
         op->state = st_done;
-        remote_op_done(op, msg, rc, FALSE);
+        remote_op_done(op, msg, pcmk_ok, FALSE);
     }
 }
 
@@ -1701,7 +1700,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
          */
         crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
                  "after successful 'off'", device, op->target);
-        advance_topology_device_in_level(op, device, NULL, pcmk_ok);
+        advance_topology_device_in_level(op, device, NULL);
         return;
 
     } else if (op->owner == FALSE) {
@@ -2181,7 +2180,7 @@ fenced_process_fencing_reply(xmlNode *msg)
         if (rc == pcmk_ok) {
             /* An operation completed successfully. Try another device if
              * necessary, otherwise mark the operation as done. */
-            advance_topology_device_in_level(op, device, msg, rc);
+            advance_topology_device_in_level(op, device, msg);
             return;
         } else {
             /* This device failed, time to try another topology level. If no other
-- 
2.27.0


From 05437e1339bc1f9071b43e97d5846a939687951d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 29 Nov 2021 11:59:17 -0600
Subject: [PATCH 18/19] Refactor: fencer: minor renames for consistency

... per review
---
 daemons/fenced/fenced_remote.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index d54e6a4ef..8feb40147 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -63,7 +63,7 @@ typedef struct device_properties_s {
     int delay_base[st_phase_max];
 } device_properties_t;
 
-typedef struct st_query_result_s {
+typedef struct {
     /* Name of peer that sent this result */
     char *host;
     /* Only try peers for non-topology based operations once */
@@ -95,13 +95,12 @@ sort_strings(gconstpointer a, gconstpointer b)
 static void
 free_remote_query(gpointer data)
 {
-    if (data) {
-        peer_device_info_t *query = data;
+    if (data != NULL) {
+        peer_device_info_t *peer = data;
 
-        crm_trace("Free'ing query result from %s", query->host);
-        g_hash_table_destroy(query->devices);
-        free(query->host);
-        free(query);
+        g_hash_table_destroy(peer->devices);
+        free(peer->host);
+        free(peer);
     }
 }
 
-- 
2.27.0


From 86974d7cef05bafbed540d02e59514292581ae65 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 30 Nov 2021 08:33:41 -0600
Subject: [PATCH 19/19] Refactor: fencer: simplify send_async_reply()

... as suggested in review
---
 daemons/fenced/fenced_commands.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index ea7d281ce..f34cb4f13 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2384,36 +2384,34 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
                  int pid, bool merged)
 {
     xmlNode *reply = NULL;
-    bool bcast = false;
 
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
 
+    log_async_result(cmd, result, pid, NULL, merged);
+
     reply = construct_async_reply(cmd, result);
+    if (merged) {
+        crm_xml_add(reply, F_STONITH_MERGED, "true");
+    }
 
-    // If target was also the originator, broadcast fencing results for it
     if (!stand_alone && pcmk__is_fencing_action(cmd->action)
         && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) {
-
+        /* The target was also the originator, so broadcast the result on its
+         * behalf (since it will be unable to).
+         */
         crm_trace("Broadcast '%s' result for %s (target was also originator)",
                   cmd->action, cmd->victim);
         crm_xml_add(reply, F_SUBTYPE, "broadcast");
         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
-        bcast = true;
-    }
-
-    log_async_result(cmd, result, pid, NULL, merged);
-
-    if (merged) {
-        crm_xml_add(reply, F_STONITH_MERGED, "true");
-    }
-    crm_log_xml_trace(reply, "Reply");
-
-    if (bcast) {
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
     } else {
+        // Reply only to the originator
         stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client);
     }
 
+    crm_log_xml_trace(reply, "Reply");
+    free_xml(reply);
+
     if (stand_alone) {
         /* Do notification with a clean data object */
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
@@ -2430,8 +2428,6 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
         do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
         do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
     }
-
-    free_xml(reply);
 }
 
 static void
-- 
2.27.0