Blame SOURCES/005-fencing-reasons.patch

97a979
From 3d10dad9a555aae040d8473edfe31a4e4279c066 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 12:34:03 -0600
97a979
Subject: [PATCH 01/19] Refactor: libcrmcommon: add internal API for checking
97a979
 for fencing action
97a979
97a979
The naming is a little awkward -- "fencing action" has multiple meanings
97a979
depending on the context. It can refer to fencer API requests, fence device
97a979
actions, fence agent actions, or just those actions that fence a node (off and
97a979
reboot).
97a979
97a979
This new function pcmk__is_fencing_action() uses the last meaning, so it does
97a979
*not* return true for unfencing ("on" actions).
97a979
---
97a979
 include/crm/common/internal.h |  1 +
97a979
 lib/common/operations.c       | 14 ++++++++++++++
97a979
 2 files changed, 15 insertions(+)
97a979
97a979
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
97a979
index a35c5769a..694fc6cd4 100644
97a979
--- a/include/crm/common/internal.h
97a979
+++ b/include/crm/common/internal.h
97a979
@@ -218,6 +218,7 @@ char *pcmk__notify_key(const char *rsc_id, const char *notify_type,
97a979
 char *pcmk__transition_key(int transition_id, int action_id, int target_rc,
97a979
                            const char *node);
97a979
 void pcmk__filter_op_for_digest(xmlNode *param_set);
97a979
+bool pcmk__is_fencing_action(const char *action);
97a979
 
97a979
 
97a979
 // bitwise arithmetic utilities
97a979
diff --git a/lib/common/operations.c b/lib/common/operations.c
97a979
index aa7106ce6..366c18970 100644
97a979
--- a/lib/common/operations.c
97a979
+++ b/lib/common/operations.c
97a979
@@ -523,3 +523,17 @@ crm_op_needs_metadata(const char *rsc_class, const char *op)
97a979
                             CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED,
97a979
                             CRMD_ACTION_NOTIFY, NULL);
97a979
 }
97a979
+
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Check whether an action name is for a fencing action
97a979
+ *
97a979
+ * \param[in] action  Action name to check
97a979
+ *
97a979
+ * \return true if \p action is "off", "reboot", or "poweroff", otherwise false
97a979
+ */
97a979
+bool
97a979
+pcmk__is_fencing_action(const char *action)
97a979
+{
97a979
+    return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL);
97a979
+}
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 86ac00fb3e99d79ca2c442ae1670fe850146f734 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 12:38:58 -0600
97a979
Subject: [PATCH 02/19] Low: fencer,scheduler: compare fence action names
97a979
 case-sensitively
97a979
97a979
Use the new convenience function pcmk__is_fencing_action() to check whether
97a979
an action name is a fencing action ("off", "reboot", or "poweroff"). This
97a979
changes the behavior from case-insensitive to case-sensitive, which is more
97a979
appropriate (the case-insensitivity was inherited from lazy use of the old
97a979
safe_str_eq() function which was always case-insensitive).
97a979
---
97a979
 daemons/fenced/fenced_commands.c    | 6 +++---
97a979
 daemons/fenced/fenced_remote.c      | 2 +-
97a979
 lib/pacemaker/pcmk_graph_producer.c | 2 +-
97a979
 lib/pengine/common.c                | 8 +-------
97a979
 4 files changed, 6 insertions(+), 12 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 63bfad3a9..46c840f2a 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -128,7 +128,7 @@ get_action_delay_max(stonith_device_t * device, const char * action)
97a979
     const char *value = NULL;
97a979
     int delay_max = 0;
97a979
 
97a979
-    if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
97a979
+    if (!pcmk__is_fencing_action(action)) {
97a979
         return 0;
97a979
     }
97a979
 
97a979
@@ -146,7 +146,7 @@ get_action_delay_base(stonith_device_t *device, const char *action, const char *
97a979
     char *hash_value = NULL;
97a979
     int delay_base = 0;
97a979
 
97a979
-    if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
97a979
+    if (!pcmk__is_fencing_action(action)) {
97a979
         return 0;
97a979
     }
97a979
 
97a979
@@ -448,7 +448,7 @@ stonith_device_execute(stonith_device_t * device)
97a979
 
97a979
     if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
97a979
                          STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
97a979
-        if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
97a979
+        if (pcmk__is_fencing_action(cmd->action)) {
97a979
             if (node_does_watchdog_fencing(stonith_our_uname)) {
97a979
                 pcmk__panic(__func__);
97a979
                 goto done;
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 963433bf3..358ea3aa7 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -1758,7 +1758,7 @@ all_topology_devices_found(remote_fencing_op_t * op)
97a979
     if (!tp) {
97a979
         return FALSE;
97a979
     }
97a979
-    if (pcmk__strcase_any_of(op->action, "off", "reboot", NULL)) {
97a979
+    if (pcmk__is_fencing_action(op->action)) {
97a979
         /* Don't count the devices on the target node if we are killing
97a979
          * the target node. */
97a979
         skip_target = TRUE;
97a979
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
97a979
index ffcbd1274..5bec9d8ce 100644
97a979
--- a/lib/pacemaker/pcmk_graph_producer.c
97a979
+++ b/lib/pacemaker/pcmk_graph_producer.c
97a979
@@ -721,7 +721,7 @@ add_downed_nodes(xmlNode *xml, const pe_action_t *action,
97a979
         /* Fencing makes the action's node and any hosted guest nodes down */
97a979
         const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
97a979
 
97a979
-        if (pcmk__strcase_any_of(fence, "off", "reboot", NULL)) {
97a979
+        if (pcmk__is_fencing_action(fence)) {
97a979
             xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
97a979
             add_node_to_xml_by_id(action->node->details->id, downed);
97a979
             pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed);
97a979
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
97a979
index 236fc26b1..fe4223816 100644
97a979
--- a/lib/pengine/common.c
97a979
+++ b/lib/pengine/common.c
97a979
@@ -27,12 +27,6 @@ check_health(const char *value)
97a979
                            "migrate-on-red", NULL);
97a979
 }
97a979
 
97a979
-static bool
97a979
-check_stonith_action(const char *value)
97a979
-{
97a979
-    return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL);
97a979
-}
97a979
-
97a979
 static bool
97a979
 check_placement_strategy(const char *value)
97a979
 {
97a979
@@ -114,7 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = {
97a979
     },
97a979
     {
97a979
         "stonith-action", NULL, "select", "reboot, off, poweroff",
97a979
-        "reboot", check_stonith_action,
97a979
+        "reboot", pcmk__is_fencing_action,
97a979
         "Action to send to fence device when a node needs to be fenced "
97a979
             "(\"poweroff\" is a deprecated alias for \"off\")",
97a979
         NULL
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From c8f6e8a04c4fa4271db817af0a23aa941c9d7689 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 12 Nov 2021 17:42:21 -0600
97a979
Subject: [PATCH 03/19] Refactor: fencing: rename type for peer query replies
97a979
97a979
st_query_result_t contains the device information parsed from a peer's query
97a979
reply, but the name could easily be confused with the actual success/failure
97a979
result of the query action itself. Rename it to peer_device_info_t.
97a979
---
97a979
 daemons/fenced/fenced_remote.c | 103 +++++++++++++++++----------------
97a979
 1 file changed, 52 insertions(+), 51 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 358ea3aa7..9e2f62804 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -41,7 +41,7 @@
97a979
 
97a979
 /* When one fencer queries its peers for devices able to handle a fencing
97a979
  * request, each peer will reply with a list of such devices available to it.
97a979
- * Each reply will be parsed into a st_query_result_t, with each device's
97a979
+ * Each reply will be parsed into a peer_device_info_t, with each device's
97a979
  * information kept in a device_properties_t.
97a979
  */
97a979
 
97a979
@@ -72,18 +72,19 @@ typedef struct st_query_result_s {
97a979
     int ndevices;
97a979
     /* Devices available to this host that are capable of fencing the target */
97a979
     GHashTable *devices;
97a979
-} st_query_result_t;
97a979
+} peer_device_info_t;
97a979
 
97a979
 GHashTable *stonith_remote_op_list = NULL;
97a979
 
97a979
-void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc);
97a979
+void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer,
97a979
+                         int rc);
97a979
 static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup);
97a979
 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
97a979
                                   int call_options);
97a979
 
97a979
 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
97a979
 static int get_op_total_timeout(const remote_fencing_op_t *op,
97a979
-                                const st_query_result_t *chosen_peer);
97a979
+                                const peer_device_info_t *chosen_peer);
97a979
 
97a979
 static gint
97a979
 sort_strings(gconstpointer a, gconstpointer b)
97a979
@@ -95,7 +96,7 @@ static void
97a979
 free_remote_query(gpointer data)
97a979
 {
97a979
     if (data) {
97a979
-        st_query_result_t *query = data;
97a979
+        peer_device_info_t *query = data;
97a979
 
97a979
         crm_trace("Free'ing query result from %s", query->host);
97a979
         g_hash_table_destroy(query->devices);
97a979
@@ -150,8 +151,8 @@ count_peer_device(gpointer key, gpointer value, gpointer user_data)
97a979
  * \return Number of devices available to peer that were not already executed
97a979
  */
97a979
 static int
97a979
-count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
-                   gboolean verified_only)
97a979
+count_peer_devices(const remote_fencing_op_t *op,
97a979
+                   const peer_device_info_t *peer, gboolean verified_only)
97a979
 {
97a979
     struct peer_count_data data;
97a979
 
97a979
@@ -175,7 +176,7 @@ count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
  * \return Device properties if found, NULL otherwise
97a979
  */
97a979
 static device_properties_t *
97a979
-find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
+find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
97a979
                  const char *device)
97a979
 {
97a979
     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
97a979
@@ -196,7 +197,7 @@ find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
  * \return TRUE if device was found and marked, FALSE otherwise
97a979
  */
97a979
 static gboolean
97a979
-grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer,
97a979
+grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
97a979
                  const char *device, gboolean verified_devices_only)
97a979
 {
97a979
     device_properties_t *props = find_peer_device(op, peer, device);
97a979
@@ -1216,7 +1217,7 @@ enum find_best_peer_options {
97a979
     FIND_PEER_VERIFIED_ONLY = 0x0004,
97a979
 };
97a979
 
97a979
-static st_query_result_t *
97a979
+static peer_device_info_t *
97a979
 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
97a979
 {
97a979
     GList *iter = NULL;
97a979
@@ -1227,7 +1228,7 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer
97a979
     }
97a979
 
97a979
     for (iter = op->query_results; iter != NULL; iter = iter->next) {
97a979
-        st_query_result_t *peer = iter->data;
97a979
+        peer_device_info_t *peer = iter->data;
97a979
 
97a979
         crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
97a979
                   peer->host, op->target, peer->ndevices,
97a979
@@ -1257,11 +1258,11 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer
97a979
     return NULL;
97a979
 }
97a979
 
97a979
-static st_query_result_t *
97a979
+static peer_device_info_t *
97a979
 stonith_choose_peer(remote_fencing_op_t * op)
97a979
 {
97a979
     const char *device = NULL;
97a979
-    st_query_result_t *peer = NULL;
97a979
+    peer_device_info_t *peer = NULL;
97a979
     uint32_t active = fencing_active_peers();
97a979
 
97a979
     do {
97a979
@@ -1317,8 +1318,8 @@ stonith_choose_peer(remote_fencing_op_t * op)
97a979
 }
97a979
 
97a979
 static int
97a979
-get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
-                   const char *device)
97a979
+get_device_timeout(const remote_fencing_op_t *op,
97a979
+                   const peer_device_info_t *peer, const char *device)
97a979
 {
97a979
     device_properties_t *props;
97a979
 
97a979
@@ -1338,7 +1339,7 @@ get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer,
97a979
 
97a979
 struct timeout_data {
97a979
     const remote_fencing_op_t *op;
97a979
-    const st_query_result_t *peer;
97a979
+    const peer_device_info_t *peer;
97a979
     int total_timeout;
97a979
 };
97a979
 
97a979
@@ -1365,7 +1366,7 @@ add_device_timeout(gpointer key, gpointer value, gpointer user_data)
97a979
 }
97a979
 
97a979
 static int
97a979
-get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
97a979
+get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
97a979
 {
97a979
     struct timeout_data timeout;
97a979
 
97a979
@@ -1380,7 +1381,7 @@ get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer)
97a979
 
97a979
 static int
97a979
 get_op_total_timeout(const remote_fencing_op_t *op,
97a979
-                     const st_query_result_t *chosen_peer)
97a979
+                     const peer_device_info_t *chosen_peer)
97a979
 {
97a979
     int total_timeout = 0;
97a979
     stonith_topology_t *tp = find_topology_for_host(op->target);
97a979
@@ -1403,7 +1404,7 @@ get_op_total_timeout(const remote_fencing_op_t *op,
97a979
             }
97a979
             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
97a979
                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
97a979
-                    const st_query_result_t *peer = iter->data;
97a979
+                    const peer_device_info_t *peer = iter->data;
97a979
 
97a979
                     if (find_peer_device(op, peer, device_list->data)) {
97a979
                         total_timeout += get_device_timeout(op, peer,
97a979
@@ -1555,7 +1556,7 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
97a979
 }
97a979
 
97a979
 void
97a979
-call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
97a979
+call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
97a979
 {
97a979
     const char *device = NULL;
97a979
     int timeout = op->base_timeout;
97a979
@@ -1734,8 +1735,8 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
97a979
 static gint
97a979
 sort_peers(gconstpointer a, gconstpointer b)
97a979
 {
97a979
-    const st_query_result_t *peer_a = a;
97a979
-    const st_query_result_t *peer_b = b;
97a979
+    const peer_device_info_t *peer_a = a;
97a979
+    const peer_device_info_t *peer_b = b;
97a979
 
97a979
     return (peer_b->ndevices - peer_a->ndevices);
97a979
 }
97a979
@@ -1768,7 +1769,7 @@ all_topology_devices_found(remote_fencing_op_t * op)
97a979
         for (device = tp->levels[i]; device; device = device->next) {
97a979
             match = NULL;
97a979
             for (iter = op->query_results; iter && !match; iter = iter->next) {
97a979
-                st_query_result_t *peer = iter->data;
97a979
+                peer_device_info_t *peer = iter->data;
97a979
 
97a979
                 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
97a979
                     continue;
97a979
@@ -1850,31 +1851,31 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device,
97a979
  *
97a979
  * \param[in]     xml       XML node containing device properties
97a979
  * \param[in,out] op        Operation that query and reply relate to
97a979
- * \param[in,out] result    Peer's results
97a979
+ * \param[in,out] peer      Peer's device information
97a979
  * \param[in]     device    ID of device being parsed
97a979
  */
97a979
 static void
97a979
 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
97a979
-                      st_query_result_t *result, const char *device)
97a979
+                      peer_device_info_t *peer, const char *device)
97a979
 {
97a979
     xmlNode *child;
97a979
     int verified = 0;
97a979
     device_properties_t *props = calloc(1, sizeof(device_properties_t));
97a979
 
97a979
-    /* Add a new entry to this result's devices list */
97a979
+    /* Add a new entry to this peer's devices list */
97a979
     CRM_ASSERT(props != NULL);
97a979
-    g_hash_table_insert(result->devices, strdup(device), props);
97a979
+    g_hash_table_insert(peer->devices, strdup(device), props);
97a979
 
97a979
     /* Peers with verified (monitored) access will be preferred */
97a979
     crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
97a979
     if (verified) {
97a979
         crm_trace("Peer %s has confirmed a verified device %s",
97a979
-                  result->host, device);
97a979
+                  peer->host, device);
97a979
         props->verified = TRUE;
97a979
     }
97a979
 
97a979
     /* Parse action-specific device properties */
97a979
-    parse_action_specific(xml, result->host, device, op_requested_action(op),
97a979
+    parse_action_specific(xml, peer->host, device, op_requested_action(op),
97a979
                           op, st_phase_requested, props);
97a979
     for (child = pcmk__xml_first_child(xml); child != NULL;
97a979
          child = pcmk__xml_next(child)) {
97a979
@@ -1883,10 +1884,10 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
97a979
          * winds up getting remapped.
97a979
          */
97a979
         if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) {
97a979
-            parse_action_specific(child, result->host, device, "off",
97a979
+            parse_action_specific(child, peer->host, device, "off",
97a979
                                   op, st_phase_off, props);
97a979
         } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) {
97a979
-            parse_action_specific(child, result->host, device, "on",
97a979
+            parse_action_specific(child, peer->host, device, "on",
97a979
                                   op, st_phase_on, props);
97a979
         }
97a979
     }
97a979
@@ -1903,17 +1904,17 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
97a979
  *
97a979
  * \return Newly allocated result structure with parsed reply
97a979
  */
97a979
-static st_query_result_t *
97a979
+static peer_device_info_t *
97a979
 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
97a979
 {
97a979
-    st_query_result_t *result = calloc(1, sizeof(st_query_result_t));
97a979
+    peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
97a979
     xmlNode *child;
97a979
 
97a979
     // cppcheck seems not to understand the abort logic in CRM_CHECK
97a979
     // cppcheck-suppress memleak
97a979
-    CRM_CHECK(result != NULL, return NULL);
97a979
-    result->host = strdup(host);
97a979
-    result->devices = pcmk__strkey_table(free, free);
97a979
+    CRM_CHECK(peer != NULL, return NULL);
97a979
+    peer->host = strdup(host);
97a979
+    peer->devices = pcmk__strkey_table(free, free);
97a979
 
97a979
     /* Each child element describes one capable device available to the peer */
97a979
     for (child = pcmk__xml_first_child(xml); child != NULL;
97a979
@@ -1921,17 +1922,17 @@ add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml
97a979
         const char *device = ID(child);
97a979
 
97a979
         if (device) {
97a979
-            add_device_properties(child, op, result, device);
97a979
+            add_device_properties(child, op, peer, device);
97a979
         }
97a979
     }
97a979
 
97a979
-    result->ndevices = g_hash_table_size(result->devices);
97a979
-    CRM_CHECK(ndevices == result->ndevices,
97a979
+    peer->ndevices = g_hash_table_size(peer->devices);
97a979
+    CRM_CHECK(ndevices == peer->ndevices,
97a979
               crm_err("Query claimed to have %d device%s but %d found",
97a979
-                      ndevices, pcmk__plural_s(ndevices), result->ndevices));
97a979
+                      ndevices, pcmk__plural_s(ndevices), peer->ndevices));
97a979
 
97a979
-    op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers);
97a979
-    return result;
97a979
+    op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
97a979
+    return peer;
97a979
 }
97a979
 
97a979
 /*!
97a979
@@ -1957,7 +1958,7 @@ process_remote_stonith_query(xmlNode * msg)
97a979
     const char *id = NULL;
97a979
     const char *host = NULL;
97a979
     remote_fencing_op_t *op = NULL;
97a979
-    st_query_result_t *result = NULL;
97a979
+    peer_device_info_t *peer = NULL;
97a979
     uint32_t replies_expected;
97a979
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
97a979
 
97a979
@@ -1991,7 +1992,7 @@ process_remote_stonith_query(xmlNode * msg)
97a979
              op->replies, replies_expected, host,
97a979
              op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
97a979
     if (ndevices > 0) {
97a979
-        result = add_result(op, host, ndevices, dev);
97a979
+        peer = add_result(op, host, ndevices, dev);
97a979
     }
97a979
 
97a979
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
97a979
@@ -2001,7 +2002,7 @@ process_remote_stonith_query(xmlNode * msg)
97a979
         if (op->state == st_query && all_topology_devices_found(op)) {
97a979
             /* All the query results are in for the topology, start the fencing ops. */
97a979
             crm_trace("All topology devices found");
97a979
-            call_remote_stonith(op, result, pcmk_ok);
97a979
+            call_remote_stonith(op, peer, pcmk_ok);
97a979
 
97a979
         } else if (have_all_replies) {
97a979
             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
97a979
@@ -2010,15 +2011,15 @@ process_remote_stonith_query(xmlNode * msg)
97a979
         }
97a979
 
97a979
     } else if (op->state == st_query) {
97a979
-        int nverified = count_peer_devices(op, result, TRUE);
97a979
+        int nverified = count_peer_devices(op, peer, TRUE);
97a979
 
97a979
         /* We have a result for a non-topology fencing op that looks promising,
97a979
          * go ahead and start fencing before query timeout */
97a979
-        if (result && (host_is_target == FALSE) && nverified) {
97a979
+        if ((peer != NULL) && !host_is_target && nverified) {
97a979
             /* we have a verified device living on a peer that is not the target */
97a979
             crm_trace("Found %d verified device%s",
97a979
                       nverified, pcmk__plural_s(nverified));
97a979
-            call_remote_stonith(op, result, pcmk_ok);
97a979
+            call_remote_stonith(op, peer, pcmk_ok);
97a979
 
97a979
         } else if (have_all_replies) {
97a979
             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
97a979
@@ -2029,10 +2030,10 @@ process_remote_stonith_query(xmlNode * msg)
97a979
             crm_trace("Waiting for more peer results before launching fencing operation");
97a979
         }
97a979
 
97a979
-    } else if (result && (op->state == st_done)) {
97a979
+    } else if ((peer != NULL) && (op->state == st_done)) {
97a979
         crm_info("Discarding query result from %s (%d device%s): "
97a979
-                 "Operation is %s", result->host,
97a979
-                 result->ndevices, pcmk__plural_s(result->ndevices),
97a979
+                 "Operation is %s", peer->host,
97a979
+                 peer->ndevices, pcmk__plural_s(peer->ndevices),
97a979
                  stonith_op_state_str(op->state));
97a979
     }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 913e0620310089d2250e9ecde383df757f8e8063 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 12:46:37 -0600
97a979
Subject: [PATCH 04/19] Low: fencer: improve broadcasting replies for fenced
97a979
 originators
97a979
97a979
If the target of a fencing action was also the originator, the executioner
97a979
broadcasts the result on their behalf.
97a979
97a979
Previously, it would check if the action was not in a list of actions that are
97a979
never broadcasted. However we really only want to broadcast off/reboot results
97a979
so just check for that instead.
97a979
97a979
This also rearranges reply creation slightly so we don't trace-log the reply
97a979
until it is fully created.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 19 +++++++++----------
97a979
 1 file changed, 9 insertions(+), 10 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 46c840f2a..e4185f6e1 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2385,32 +2385,31 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
                  int pid, bool merged)
97a979
 {
97a979
     xmlNode *reply = NULL;
97a979
-    gboolean bcast = FALSE;
97a979
+    bool bcast = false;
97a979
 
97a979
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
97a979
 
97a979
     reply = construct_async_reply(cmd, result);
97a979
 
97a979
-    // Only replies for certain actions are broadcast
97a979
-    if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status",
97a979
-                         NULL)) {
97a979
-        crm_trace("Never broadcast '%s' replies", cmd->action);
97a979
+    // If target was also the originator, broadcast fencing results for it
97a979
+    if (!stand_alone && pcmk__is_fencing_action(cmd->action)
97a979
+        && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) {
97a979
 
97a979
-    } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) {
97a979
-        crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
97a979
+        crm_trace("Broadcast '%s' result for %s (target was also originator)",
97a979
+                  cmd->action, cmd->victim);
97a979
         crm_xml_add(reply, F_SUBTYPE, "broadcast");
97a979
-        bcast = TRUE;
97a979
+        crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
97a979
+        bcast = true;
97a979
     }
97a979
 
97a979
     log_async_result(cmd, result, pid, NULL, merged);
97a979
-    crm_log_xml_trace(reply, "Reply");
97a979
 
97a979
     if (merged) {
97a979
         crm_xml_add(reply, F_STONITH_MERGED, "true");
97a979
     }
97a979
+    crm_log_xml_trace(reply, "Reply");
97a979
 
97a979
     if (bcast) {
97a979
-        crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
97a979
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
97a979
 
97a979
     } else if (cmd->origin) {
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 8b8f94fd9ca5e61922cb81e32c8a3d0f1d75fb0b Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 11 Nov 2021 14:40:49 -0600
97a979
Subject: [PATCH 05/19] Refactor: fencer: avoid code duplication when sending
97a979
 async reply
97a979
97a979
... and clean up reply function
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 33 ++++++++++++++++++--------------
97a979
 1 file changed, 19 insertions(+), 14 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index e4185f6e1..4ea0a337a 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2411,15 +2411,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
 
97a979
     if (bcast) {
97a979
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
97a979
-
97a979
-    } else if (cmd->origin) {
97a979
-        crm_trace("Directed reply to %s", cmd->origin);
97a979
-        send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
97a979
-
97a979
     } else {
97a979
-        crm_trace("Directed local %ssync reply to %s",
97a979
-                  (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
97a979
-        do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
97a979
+        stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client);
97a979
     }
97a979
 
97a979
     if (stand_alone) {
97a979
@@ -2814,16 +2807,28 @@ check_alternate_host(const char *target)
97a979
     return alternate_host;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Send a reply to a CPG peer or IPC client
97a979
+ *
97a979
+ * \param[in] reply         XML reply to send
97a979
+ * \param[in] call_options  Send synchronously if st_opt_sync_call is set here
97a979
+ * \param[in] remote_peer   If not NULL, name of peer node to send CPG reply
97a979
+ * \param[in] client_id     If not NULL, name of client to send IPC reply
97a979
+ */
97a979
 static void
97a979
-stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
97a979
+stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer,
97a979
                    const char *client_id)
97a979
 {
97a979
-    if (remote_peer) {
97a979
-        send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
97a979
-    } else {
97a979
+    CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client_id != NULL)),
97a979
+              return);
97a979
+
97a979
+    if (remote_peer == NULL) {
97a979
         do_local_reply(reply, client_id,
97a979
-                       pcmk_is_set(call_options, st_opt_sync_call),
97a979
-                       (remote_peer != NULL));
97a979
+                       pcmk_is_set(call_options, st_opt_sync_call), FALSE);
97a979
+    } else {
97a979
+        send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng,
97a979
+                             reply, FALSE);
97a979
     }
97a979
 }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 2cdbda58f0e9f38a0e302506107fd933cb415144 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 23 Nov 2021 17:24:09 -0600
97a979
Subject: [PATCH 06/19] Refactor: fencer: ensure all requests get clean-up
97a979
97a979
handle_request() has if-else blocks for each type of request. Previously, if a
97a979
request didn't need a reply, the function would do any clean-up needed and
97a979
return immediately. Now, we track whether a reply is needed, and all request
97a979
types flow to the end of the function for consistent clean-up.
97a979
97a979
This doesn't change any behavior at this point, but allows us to do more at the
97a979
end of request handling.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 46 ++++++++++++++++++--------------
97a979
 1 file changed, 26 insertions(+), 20 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 4ea0a337a..19477b49b 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2892,6 +2892,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
 
97a979
     xmlNode *data = NULL;
97a979
     xmlNode *reply = NULL;
97a979
+    bool need_reply = true;
97a979
 
97a979
     char *output = NULL;
97a979
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
97a979
@@ -2921,10 +2922,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         pcmk__ipc_send_xml(client, id, reply, flags);
97a979
         client->request_id = 0;
97a979
         free_xml(reply);
97a979
-        return 0;
97a979
+        rc = pcmk_ok;
97a979
+        need_reply = false;
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
97a979
         rc = stonith_device_action(request, &output);
97a979
+        need_reply = (rc != -EINPROGRESS);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
97a979
         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
97a979
@@ -2933,7 +2936,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
 
97a979
         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
97a979
         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
97a979
-        return 0;
97a979
+        rc = pcmk_ok;
97a979
+        need_reply = false;
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
97a979
         if (remote_peer) {
97a979
@@ -2944,7 +2948,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         remove_relay_op(request);
97a979
 
97a979
         stonith_query(request, remote_peer, client_id, call_options);
97a979
-        return 0;
97a979
+        rc = pcmk_ok;
97a979
+        need_reply = false;
97a979
 
97a979
     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
97a979
         const char *flag_name = NULL;
97a979
@@ -2965,7 +2970,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         }
97a979
 
97a979
         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
97a979
-        return 0;
97a979
+        rc = pcmk_ok;
97a979
+        need_reply = false;
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
97a979
         xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
97a979
@@ -2977,8 +2983,11 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                    crm_element_value(dev, F_STONITH_ACTION),
97a979
                    crm_element_value(dev, F_STONITH_TARGET));
97a979
 
97a979
-        if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
97a979
+        if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) {
97a979
+            rc = -EPROTO;
97a979
+        } else {
97a979
             rc = -EINPROGRESS;
97a979
+            need_reply = false;
97a979
         }
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
97a979
@@ -3012,7 +3021,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
97a979
 
97a979
                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
97a979
-                    rc = 0;
97a979
+                    rc = pcmk_ok;
97a979
                     goto done;
97a979
                 }
97a979
 
97a979
@@ -3047,10 +3056,13 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                                      FALSE);
97a979
                 rc = -EINPROGRESS;
97a979
 
97a979
-            } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
97a979
+            } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) {
97a979
+                rc = -EPROTO;
97a979
+            } else {
97a979
                 rc = -EINPROGRESS;
97a979
             }
97a979
         }
97a979
+        need_reply = (rc != -EINPROGRESS);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
97a979
         rc = stonith_fence_history(request, &data, remote_peer, call_options);
97a979
@@ -3058,8 +3070,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
             /* we don't expect answers to the broadcast
97a979
              * we might have sent out
97a979
              */
97a979
-            free_xml(data);
97a979
-            return pcmk_ok;
97a979
+            rc = pcmk_ok;
97a979
+            need_reply = false;
97a979
         }
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
97a979
@@ -3111,8 +3123,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         crm_element_value_int(request, XML_ATTR_ID, &node_id);
97a979
         name = crm_element_value(request, XML_ATTR_UNAME);
97a979
         reap_crm_member(node_id, name);
97a979
-
97a979
-        return pcmk_ok;
97a979
+        rc = pcmk_ok;
97a979
+        need_reply = false;
97a979
 
97a979
     } else {
97a979
         crm_err("Unknown IPC request %s from %s %s", op,
97a979
@@ -3120,20 +3132,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
97a979
     }
97a979
 
97a979
-  done:
97a979
-
97a979
+done:
97a979
     if (rc == -EACCES) {
97a979
         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
97a979
                  crm_str(op), pcmk__client_name(client));
97a979
     }
97a979
 
97a979
-    /* Always reply unless the request is in process still.
97a979
-     * If in progress, a reply will happen async after the request
97a979
-     * processing is finished */
97a979
-    if (rc != -EINPROGRESS) {
97a979
-        crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
97a979
-                  id, pcmk_is_set(call_options, st_opt_sync_call), call_options,
97a979
-                  crm_element_value(request, F_STONITH_CALLOPTS));
97a979
+    // Reply if result is known
97a979
+    if (need_reply) {
97a979
 
97a979
         if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
             CRM_ASSERT(client == NULL || client->request_id == id);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 067d655ebd3fbb0ed27f4e7426db4c3b661ba777 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 23 Nov 2021 17:26:32 -0600
97a979
Subject: [PATCH 07/19] Log: fencer: improve debug logs when processing CPG/IPC
97a979
 messages
97a979
97a979
By moving the result log messages from stonith_command() to handle_reply() and
97a979
handle_request(), we can simplify stonith_command() and give slightly better
97a979
messages.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 80 +++++++++++++++-----------------
97a979
 1 file changed, 38 insertions(+), 42 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 19477b49b..98af0e04f 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2883,7 +2883,7 @@ remove_relay_op(xmlNode * request)
97a979
     }
97a979
 }
97a979
 
97a979
-static int
97a979
+static void
97a979
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                xmlNode *request, const char *remote_peer)
97a979
 {
97a979
@@ -3152,73 +3152,69 @@ done:
97a979
     free_xml(data);
97a979
     free_xml(reply);
97a979
 
97a979
-    return rc;
97a979
+    crm_debug("Processed %s request from %s %s: %s (rc=%d)",
97a979
+              op, ((client == NULL)? "peer" : "client"),
97a979
+              ((client == NULL)? remote_peer : pcmk__client_name(client)),
97a979
+              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
97a979
 }
97a979
 
97a979
 static void
97a979
 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
97a979
 {
97a979
-    const char *op = crm_element_value(request, F_STONITH_OPERATION);
97a979
+    // Copy, because request might be freed before we want to log this
97a979
+    char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
97a979
 
97a979
     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
97a979
         process_remote_stonith_query(request);
97a979
-    } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
97a979
-        process_remote_stonith_exec(request);
97a979
-    } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
97a979
-        /* Reply to a complex fencing op */
97a979
+    } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
97a979
         process_remote_stonith_exec(request);
97a979
     } else {
97a979
-        crm_err("Unknown %s reply from %s %s", op,
97a979
-                ((client == NULL)? "peer" : "client"),
97a979
+        crm_err("Ignoring unknown %s reply from %s %s",
97a979
+                crm_str(op), ((client == NULL)? "peer" : "client"),
97a979
                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
97a979
         crm_log_xml_warn(request, "UnknownOp");
97a979
+        free(op);
97a979
+        return;
97a979
     }
97a979
+    crm_debug("Processed %s reply from %s %s",
97a979
+              op, ((client == NULL)? "peer" : "client"),
97a979
+              ((client == NULL)? remote_peer : pcmk__client_name(client)));
97a979
+    free(op);
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Handle a message from an IPC client or CPG peer
97a979
+ *
97a979
+ * \param[in] client      If not NULL, IPC client that sent message
97a979
+ * \param[in] id          If from IPC client, IPC message ID
97a979
+ * \param[in] flags       Message flags
97a979
+ * \param[in] message     Message XML
97a979
+ * \param[in] remote_peer If not NULL, CPG peer that sent message
97a979
+ */
97a979
 void
97a979
 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
-                xmlNode *request, const char *remote_peer)
97a979
+                xmlNode *message, const char *remote_peer)
97a979
 {
97a979
-    int call_options = 0;
97a979
-    int rc = 0;
97a979
-    gboolean is_reply = FALSE;
97a979
-
97a979
-    /* Copy op for reporting. The original might get freed by handle_reply()
97a979
-     * before we use it in crm_debug():
97a979
-     *     handle_reply()
97a979
-     *     |- process_remote_stonith_exec()
97a979
-     *     |-- remote_op_done()
97a979
-     *     |--- handle_local_reply_and_notify()
97a979
-     *     |---- crm_xml_add(...F_STONITH_OPERATION...)
97a979
-     *     |--- free_xml(op->request)
97a979
-     */
97a979
-    char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
97a979
-
97a979
-    if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
97a979
-        is_reply = TRUE;
97a979
-    }
97a979
+    int call_options = st_opt_none;
97a979
+    bool is_reply = get_xpath_object("//" T_STONITH_REPLY, message,
97a979
+                                     LOG_NEVER) != NULL;
97a979
 
97a979
-    crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
97a979
-    crm_debug("Processing %s%s %u from %s %s with call options 0x%08x",
97a979
-              op, (is_reply? " reply" : ""), id,
97a979
+    crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options);
97a979
+    crm_debug("Processing %ssynchronous %s %s %u from %s %s",
97a979
+              pcmk_is_set(call_options, st_opt_sync_call)? "" : "a",
97a979
+              crm_element_value(message, F_STONITH_OPERATION),
97a979
+              (is_reply? "reply" : "request"), id,
97a979
               ((client == NULL)? "peer" : "client"),
97a979
-              ((client == NULL)? remote_peer : pcmk__client_name(client)),
97a979
-              call_options);
97a979
+              ((client == NULL)? remote_peer : pcmk__client_name(client)));
97a979
 
97a979
     if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
         CRM_ASSERT(client == NULL || client->request_id == id);
97a979
     }
97a979
 
97a979
     if (is_reply) {
97a979
-        handle_reply(client, request, remote_peer);
97a979
+        handle_reply(client, message, remote_peer);
97a979
     } else {
97a979
-        rc = handle_request(client, id, flags, request, remote_peer);
97a979
+        handle_request(client, id, flags, message, remote_peer);
97a979
     }
97a979
-
97a979
-    crm_debug("Processed %s%s from %s %s: %s (rc=%d)",
97a979
-              op, (is_reply? " reply" : ""),
97a979
-              ((client == NULL)? "peer" : "client"),
97a979
-              ((client == NULL)? remote_peer : pcmk__client_name(client)),
97a979
-              ((rc > 0)? "" : pcmk_strerror(rc)), rc);
97a979
-    free(op);
97a979
 }
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 44cb340c11b4652f452a47eb2b0050b4a459382b Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Mon, 15 Nov 2021 16:29:09 -0600
97a979
Subject: [PATCH 08/19] Refactor: fencer: drop unused argument from
97a979
 notification functions
97a979
97a979
---
97a979
 daemons/fenced/fenced_commands.c  | 12 ++++++------
97a979
 daemons/fenced/fenced_history.c   |  6 +++---
97a979
 daemons/fenced/fenced_remote.c    |  6 +++---
97a979
 daemons/fenced/pacemaker-fenced.c | 18 +++++++++---------
97a979
 daemons/fenced/pacemaker-fenced.h |  6 +++---
97a979
 5 files changed, 24 insertions(+), 24 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 98af0e04f..946ce4042 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2428,8 +2428,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
97a979
         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
97a979
 
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
     }
97a979
 
97a979
     free_xml(reply);
97a979
@@ -3082,7 +3082,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         } else {
97a979
             rc = -EACCES;
97a979
         }
97a979
-        do_stonith_notify_device(call_options, op, rc, device_id);
97a979
+        do_stonith_notify_device(op, rc, device_id);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
97a979
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
97a979
@@ -3093,7 +3093,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         } else {
97a979
             rc = -EACCES;
97a979
         }
97a979
-        do_stonith_notify_device(call_options, op, rc, device_id);
97a979
+        do_stonith_notify_device(op, rc, device_id);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
97a979
         char *device_id = NULL;
97a979
@@ -3103,7 +3103,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         } else {
97a979
             rc = -EACCES;
97a979
         }
97a979
-        do_stonith_notify_level(call_options, op, rc, device_id);
97a979
+        do_stonith_notify_level(op, rc, device_id);
97a979
         free(device_id);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
97a979
@@ -3114,7 +3114,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         } else {
97a979
             rc = -EACCES;
97a979
         }
97a979
-        do_stonith_notify_level(call_options, op, rc, device_id);
97a979
+        do_stonith_notify_level(op, rc, device_id);
97a979
 
97a979
     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
97a979
         int node_id = 0;
97a979
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
97a979
index 1ba034ba9..7127593b6 100644
97a979
--- a/daemons/fenced/fenced_history.c
97a979
+++ b/daemons/fenced/fenced_history.c
97a979
@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target,
97a979
         g_hash_table_foreach_remove(stonith_remote_op_list,
97a979
                              stonith_remove_history_entry,
97a979
                              (gpointer) target);
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
     }
97a979
 }
97a979
 
97a979
@@ -396,7 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
97a979
 
97a979
     if (updated) {
97a979
         stonith_fence_history_trim();
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
     }
97a979
 
97a979
     if (cnt == 0) {
97a979
@@ -470,7 +470,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
97a979
            is done so send a notification for anything
97a979
            that smells like history-sync
97a979
          */
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL);
97a979
         if (crm_element_value(msg, F_STONITH_CALLID)) {
97a979
             /* this is coming from the stonith-API
97a979
             *
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 9e2f62804..c907cd120 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -423,8 +423,8 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc)
97a979
     do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
97a979
 
97a979
     /* bcast to all local clients that the fencing operation happend */
97a979
-    do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
97a979
-    do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
97a979
+    do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
97a979
+    do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
 
97a979
     /* mark this op as having notify's already sent */
97a979
     op->notify_sent = TRUE;
97a979
@@ -1119,7 +1119,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
97a979
 
97a979
     if (op->state != st_duplicate) {
97a979
         /* kick history readers */
97a979
-        do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
97a979
+        do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
     }
97a979
 
97a979
     /* safe to trim as long as that doesn't touch pending ops */
97a979
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
97a979
index a64004ce1..a290e1670 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.c
97a979
+++ b/daemons/fenced/pacemaker-fenced.c
97a979
@@ -357,7 +357,7 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int
97a979
 }
97a979
 
97a979
 void
97a979
-do_stonith_notify(int options, const char *type, int result, xmlNode * data)
97a979
+do_stonith_notify(const char *type, int result, xmlNode *data)
97a979
 {
97a979
     /* TODO: Standardize the contents of data */
97a979
     xmlNode *update_msg = create_xml_node(NULL, "notify");
97a979
@@ -380,7 +380,7 @@ do_stonith_notify(int options, const char *type, int result, xmlNode * data)
97a979
 }
97a979
 
97a979
 static void
97a979
-do_stonith_notify_config(int options, const char *op, int rc,
97a979
+do_stonith_notify_config(const char *op, int rc,
97a979
                          const char *desc, int active)
97a979
 {
97a979
     xmlNode *notify_data = create_xml_node(NULL, op);
97a979
@@ -390,20 +390,20 @@ do_stonith_notify_config(int options, const char *op, int rc,
97a979
     crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
97a979
     crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
97a979
 
97a979
-    do_stonith_notify(options, op, rc, notify_data);
97a979
+    do_stonith_notify(op, rc, notify_data);
97a979
     free_xml(notify_data);
97a979
 }
97a979
 
97a979
 void
97a979
-do_stonith_notify_device(int options, const char *op, int rc, const char *desc)
97a979
+do_stonith_notify_device(const char *op, int rc, const char *desc)
97a979
 {
97a979
-    do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list));
97a979
+    do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list));
97a979
 }
97a979
 
97a979
 void
97a979
-do_stonith_notify_level(int options, const char *op, int rc, const char *desc)
97a979
+do_stonith_notify_level(const char *op, int rc, const char *desc)
97a979
 {
97a979
-    do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology));
97a979
+    do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology));
97a979
 }
97a979
 
97a979
 static void
97a979
@@ -418,7 +418,7 @@ topology_remove_helper(const char *node, int level)
97a979
     crm_xml_add(data, XML_ATTR_STONITH_TARGET, node);
97a979
 
97a979
     rc = stonith_level_remove(data, &desc);
97a979
-    do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc);
97a979
+    do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc);
97a979
 
97a979
     free_xml(data);
97a979
     free(desc);
97a979
@@ -468,7 +468,7 @@ handle_topology_change(xmlNode *match, bool remove)
97a979
     }
97a979
 
97a979
     rc = stonith_level_register(match, &desc);
97a979
-    do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc);
97a979
+    do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc);
97a979
 
97a979
     free(desc);
97a979
 }
97a979
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
97a979
index a64b57693..3e41d867e 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.h
97a979
+++ b/daemons/fenced/pacemaker-fenced.h
97a979
@@ -233,9 +233,9 @@ xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode
97a979
 void
97a979
  do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
97a979
 
97a979
-void do_stonith_notify(int options, const char *type, int result, xmlNode * data);
97a979
-void do_stonith_notify_device(int options, const char *op, int rc, const char *desc);
97a979
-void do_stonith_notify_level(int options, const char *op, int rc, const char *desc);
97a979
+void do_stonith_notify(const char *type, int result, xmlNode *data);
97a979
+void do_stonith_notify_device(const char *op, int rc, const char *desc);
97a979
+void do_stonith_notify_level(const char *op, int rc, const char *desc);
97a979
 
97a979
 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
97a979
                                                 xmlNode *request,
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From a49df4901b663b3366634c1d58f04625ecba4005 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 16 Nov 2021 11:57:14 -0600
97a979
Subject: [PATCH 09/19] Refactor: fencer: functionize checking for privileged
97a979
 client
97a979
97a979
... for readability and to make planned changes easier
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 49 +++++++++++++++++++-------------
97a979
 1 file changed, 30 insertions(+), 19 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 946ce4042..34c956f5c 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2883,6 +2883,32 @@ remove_relay_op(xmlNode * request)
97a979
     }
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Check whether an API request was sent by a privileged user
97a979
+ *
97a979
+ * API commands related to fencing configuration may be done only by privileged
97a979
+ * IPC users (i.e. root or hacluster), because all other users should go through
97a979
+ * the CIB to have ACLs applied. If no client was given, this is a peer request,
97a979
+ * which is always allowed.
97a979
+ *
97a979
+ * \param[in] c   IPC client that sent request (or NULL if sent by CPG peer)
97a979
+ * \param[in] op  Requested API operation (for logging only)
97a979
+ *
97a979
+ * \return true if sender is peer or privileged client, otherwise false
97a979
+ */
97a979
+static inline bool
97a979
+is_privileged(pcmk__client_t *c, const char *op)
97a979
+{
97a979
+    if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) {
97a979
+        return true;
97a979
+    } else {
97a979
+        crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
97a979
+                 crm_str(op), pcmk__client_name(c));
97a979
+        return false;
97a979
+    }
97a979
+}
97a979
+
97a979
 static void
97a979
 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
                xmlNode *request, const char *remote_peer)
97a979
@@ -2898,15 +2924,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     const char *op = crm_element_value(request, F_STONITH_OPERATION);
97a979
     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
97a979
 
97a979
-    /* IPC commands related to fencing configuration may be done only by
97a979
-     * privileged users (i.e. root or hacluster), because all other users should
97a979
-     * go through the CIB to have ACLs applied.
97a979
-     *
97a979
-     * If no client was given, this is a peer request, which is always allowed.
97a979
-     */
97a979
-    bool allowed = (client == NULL)
97a979
-                   || pcmk_is_set(client->flags, pcmk__client_privileged);
97a979
-
97a979
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
97a979
 
97a979
     if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
@@ -3077,7 +3094,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
97a979
         const char *device_id = NULL;
97a979
 
97a979
-        if (allowed) {
97a979
+        if (is_privileged(client, op)) {
97a979
             rc = stonith_device_register(request, &device_id, FALSE);
97a979
         } else {
97a979
             rc = -EACCES;
97a979
@@ -3088,7 +3105,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
97a979
         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
97a979
 
97a979
-        if (allowed) {
97a979
+        if (is_privileged(client, op)) {
97a979
             rc = stonith_device_remove(device_id, FALSE);
97a979
         } else {
97a979
             rc = -EACCES;
97a979
@@ -3098,7 +3115,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
97a979
         char *device_id = NULL;
97a979
 
97a979
-        if (allowed) {
97a979
+        if (is_privileged(client, op)) {
97a979
             rc = stonith_level_register(request, &device_id);
97a979
         } else {
97a979
             rc = -EACCES;
97a979
@@ -3109,7 +3126,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
97a979
         char *device_id = NULL;
97a979
 
97a979
-        if (allowed) {
97a979
+        if (is_privileged(client, op)) {
97a979
             rc = stonith_level_remove(request, &device_id);
97a979
         } else {
97a979
             rc = -EACCES;
97a979
@@ -3133,14 +3150,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     }
97a979
 
97a979
 done:
97a979
-    if (rc == -EACCES) {
97a979
-        crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
97a979
-                 crm_str(op), pcmk__client_name(client));
97a979
-    }
97a979
-
97a979
     // Reply if result is known
97a979
     if (need_reply) {
97a979
-
97a979
         if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
             CRM_ASSERT(client == NULL || client->request_id == id);
97a979
         }
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 10ca8a5ef5266159bc3f993802aeae6537ceeb11 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 16 Nov 2021 16:59:03 -0600
97a979
Subject: [PATCH 10/19] Low: fencer: return -ETIME for peer fencing timeouts
97a979
97a979
94c55684 set the result as pcmk_ok, but it appears that the intent was just to
97a979
keep the delegate from being set, and -ETIME should still do that, while being
97a979
more appropriate.
97a979
---
97a979
 daemons/fenced/fenced_remote.c | 2 +-
97a979
 1 file changed, 1 insertion(+), 1 deletion(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index c907cd120..dc7b802da 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -608,7 +608,7 @@ remote_op_timeout_one(gpointer userdata)
97a979
 
97a979
     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
97a979
                " id=%.8s", op->action, op->target, op->client_name, op->id);
97a979
-    call_remote_stonith(op, NULL, pcmk_ok);
97a979
+    call_remote_stonith(op, NULL, -ETIME);
97a979
     return FALSE;
97a979
 }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From fb2eefeb695cc92e1a2aed6f1f1d2b900d4fb83e Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 16 Nov 2021 17:54:56 -0600
97a979
Subject: [PATCH 11/19] Refactor: fencer: functionize common part of timeout
97a979
 handling
97a979
97a979
Previously, remote_op_timeout() was called from multiple places, but only one
97a979
of those places needed the full processing. The common part is now in a new
97a979
function finalize_timed_out_op() called from all the places, and
97a979
remote_op_timeout() now has just the additional processing needed by the one
97a979
place plus a call to the new function.
97a979
97a979
This will allow a future change to set a different exit reason depending on
97a979
which step timed out.
97a979
---
97a979
 daemons/fenced/fenced_remote.c | 49 +++++++++++++++++++++++-----------
97a979
 1 file changed, 34 insertions(+), 15 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index dc7b802da..22c4b0772 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -612,20 +612,18 @@ remote_op_timeout_one(gpointer userdata)
97a979
     return FALSE;
97a979
 }
97a979
 
97a979
-static gboolean
97a979
-remote_op_timeout(gpointer userdata)
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Finalize a remote fencer operation that timed out
97a979
+ *
97a979
+ * \param[in] op      Fencer operation that timed out
97a979
+ */
97a979
+static void
97a979
+finalize_timed_out_op(remote_fencing_op_t *op)
97a979
 {
97a979
-    remote_fencing_op_t *op = userdata;
97a979
 
97a979
     op->op_timer_total = 0;
97a979
 
97a979
-    if (op->state == st_done) {
97a979
-        crm_debug("Action '%s' targeting %s for client %s already completed "
97a979
-                  CRM_XS " id=%.8s",
97a979
-                  op->action, op->target, op->client_name, op->id);
97a979
-        return FALSE;
97a979
-    }
97a979
-
97a979
     crm_debug("Action '%s' targeting %s for client %s timed out "
97a979
               CRM_XS " id=%.8s",
97a979
               op->action, op->target, op->client_name, op->id);
97a979
@@ -637,14 +635,35 @@ remote_op_timeout(gpointer userdata)
97a979
          */
97a979
         op->state = st_done;
97a979
         remote_op_done(op, NULL, pcmk_ok, FALSE);
97a979
-        return FALSE;
97a979
+        return;
97a979
     }
97a979
 
97a979
     op->state = st_failed;
97a979
 
97a979
     remote_op_done(op, NULL, -ETIME, FALSE);
97a979
+}
97a979
 
97a979
-    return FALSE;
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Finalize a remote fencer operation that timed out
97a979
+ *
97a979
+ * \param[in] userdata  Fencer operation that timed out
97a979
+ *
97a979
+ * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
97a979
+ */
97a979
+static gboolean
97a979
+remote_op_timeout(gpointer userdata)
97a979
+{
97a979
+    remote_fencing_op_t *op = userdata;
97a979
+
97a979
+    if (op->state == st_done) {
97a979
+        crm_debug("Action '%s' targeting %s for client %s already completed "
97a979
+                  CRM_XS " id=%.8s",
97a979
+                  op->action, op->target, op->client_name, op->id);
97a979
+    } else {
97a979
+        finalize_timed_out_op(userdata);
97a979
+    }
97a979
+    return G_SOURCE_REMOVE;
97a979
 }
97a979
 
97a979
 static gboolean
97a979
@@ -670,7 +689,7 @@ remote_op_query_timeout(gpointer data)
97a979
             g_source_remove(op->op_timer_total);
97a979
             op->op_timer_total = 0;
97a979
         }
97a979
-        remote_op_timeout(op);
97a979
+        finalize_timed_out_op(op);
97a979
     }
97a979
 
97a979
     return FALSE;
97a979
@@ -1675,8 +1694,8 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
97a979
         crm_info("No remaining peers capable of fencing (%s) %s for client %s "
97a979
                  CRM_XS " state=%s", op->action, op->target, op->client_name,
97a979
                  stonith_op_state_str(op->state));
97a979
-        CRM_LOG_ASSERT(op->state < st_done);
97a979
-        remote_op_timeout(op);
97a979
+        CRM_CHECK(op->state < st_done, return);
97a979
+        finalize_timed_out_op(op);
97a979
 
97a979
     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
97a979
 //        int rc = -EHOSTUNREACH;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From c047005a112ac7da5ba62084e39c79db739f0923 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 18 Nov 2021 10:05:18 -0600
97a979
Subject: [PATCH 12/19] Low: fencer: handle malformed manual confirmation
97a979
 requests better
97a979
97a979
Rename stonith_manual_ack() to fenced_handle_manual_confirmation(), and move
97a979
more of the manual confirmation handling in handle_request() into it, for
97a979
better code isolation. This will also make planned changes easier.
97a979
97a979
The one behavioral difference is that a failure of initiate_remote_stonith_op()
97a979
will now be ignored rather than segmentation fault trying to dereference NULL.
97a979
---
97a979
 daemons/fenced/fenced_commands.c  | 20 ++++++++++++--------
97a979
 daemons/fenced/fenced_remote.c    | 29 ++++++++++++++++++++++++-----
97a979
 daemons/fenced/pacemaker-fenced.h |  2 +-
97a979
 3 files changed, 37 insertions(+), 14 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 34c956f5c..6f325b9e8 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -3012,14 +3012,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         if (remote_peer || stand_alone) {
97a979
             rc = stonith_fence(request);
97a979
 
97a979
-        } else if (call_options & st_opt_manual_ack) {
97a979
-            remote_fencing_op_t *rop = NULL;
97a979
-            xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
97a979
-            const char *target = crm_element_value(dev, F_STONITH_TARGET);
97a979
-
97a979
-            crm_notice("Received manual confirmation that %s is fenced", target);
97a979
-            rop = initiate_remote_stonith_op(client, request, TRUE);
97a979
-            rc = stonith_manual_ack(request, rop);
97a979
+        } else if (pcmk_is_set(call_options, st_opt_manual_ack)) {
97a979
+            switch (fenced_handle_manual_confirmation(client, request)) {
97a979
+                case pcmk_rc_ok:
97a979
+                    rc = pcmk_ok;
97a979
+                    break;
97a979
+                case EINPROGRESS:
97a979
+                    rc = -EINPROGRESS;
97a979
+                    break;
97a979
+                default:
97a979
+                    rc = -EPROTO;
97a979
+                    break;
97a979
+            }
97a979
 
97a979
         } else {
97a979
             const char *alternate_host = NULL;
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 22c4b0772..60ee5e32e 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -1003,22 +1003,41 @@ static uint32_t fencing_active_peers(void)
97a979
     return count;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Process a manual confirmation of a pending fence action
97a979
+ *
97a979
+ * \param[in]  client  IPC client that sent confirmation
97a979
+ * \param[in]  msg     Request XML with manual confirmation
97a979
+ *
97a979
+ * \return Standard Pacemaker return code
97a979
+ */
97a979
 int
97a979
-stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
97a979
+fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
97a979
 {
97a979
+    remote_fencing_op_t *op = NULL;
97a979
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
97a979
 
97a979
+    CRM_CHECK(dev != NULL, return EPROTO);
97a979
+
97a979
+    crm_notice("Received manual confirmation that %s has been fenced",
97a979
+               crm_str(crm_element_value(dev, F_STONITH_TARGET)));
97a979
+    op = initiate_remote_stonith_op(client, msg, TRUE);
97a979
+    if (op == NULL) {
97a979
+        return EPROTO;
97a979
+    }
97a979
     op->state = st_done;
97a979
     set_fencing_completed(op);
97a979
     op->delegate = strdup("a human");
97a979
 
97a979
-    crm_notice("Injecting manual confirmation that %s is safely off/down",
97a979
-               crm_element_value(dev, F_STONITH_TARGET));
97a979
+    // For the fencer's purposes, the fencing operation is done
97a979
 
97a979
     remote_op_done(op, msg, pcmk_ok, FALSE);
97a979
 
97a979
-    // Replies are sent via done_cb -> send_async_reply() -> do_local_reply()
97a979
-    return -EINPROGRESS;
97a979
+    /* For the requester's purposes, the operation is still pending. The
97a979
+     * actual result will be sent asynchronously via the operation's done_cb().
97a979
+     */
97a979
+    return EINPROGRESS;
97a979
 }
97a979
 
97a979
 /*!
97a979
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
97a979
index 3e41d867e..cf88644f1 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.h
97a979
+++ b/daemons/fenced/pacemaker-fenced.h
97a979
@@ -256,7 +256,7 @@ bool fencing_peer_active(crm_node_t *peer);
97a979
 
97a979
 void set_fencing_completed(remote_fencing_op_t * op);
97a979
 
97a979
-int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
97a979
+int fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg);
97a979
 
97a979
 gboolean node_has_attr(const char *node, const char *name, const char *value);
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From ec60f014b5a8f774aa57a26e40a2b1b94a7e3d3a Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 18 Nov 2021 10:35:31 -0600
97a979
Subject: [PATCH 13/19] Low: fencer: handle malformed topology level removal
97a979
 requests better
97a979
97a979
Log the malformed request, and return -EPROTO instead of -EINVAL. If a request
97a979
is missing a level number, treat it as malformed instead of as a request to
97a979
remove all.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 18 +++++++++---------
97a979
 1 file changed, 9 insertions(+), 9 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 6f325b9e8..358844203 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -1678,27 +1678,27 @@ stonith_level_register(xmlNode *msg, char **desc)
97a979
 int
97a979
 stonith_level_remove(xmlNode *msg, char **desc)
97a979
 {
97a979
-    int id = 0;
97a979
+    int id = -1;
97a979
     stonith_topology_t *tp;
97a979
     char *target;
97a979
 
97a979
     /* Unlike additions, removal requests should always have one level tag */
97a979
     xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
97a979
 
97a979
-    CRM_CHECK(level != NULL, return -EINVAL);
97a979
+    CRM_CHECK(level != NULL, return -EPROTO);
97a979
 
97a979
     target = stonith_level_key(level, -1);
97a979
     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id;;
97a979
+
97a979
+    CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX),
97a979
+              crm_log_xml_warn(msg, "invalid level");
97a979
+              free(target);
97a979
+              return -EPROTO);
97a979
+
97a979
     if (desc) {
97a979
         *desc = crm_strdup_printf("%s[%d]", target, id);
97a979
     }
97a979
 
97a979
-    /* Sanity-check arguments */
97a979
-    if (id >= ST_LEVEL_MAX) {
97a979
-        free(target);
97a979
-        return -EINVAL;
97a979
-    }
97a979
-
97a979
     tp = g_hash_table_lookup(topology, target);
97a979
     if (tp == NULL) {
97a979
         guint nentries = g_hash_table_size(topology);
97a979
@@ -1714,7 +1714,7 @@ stonith_level_remove(xmlNode *msg, char **desc)
97a979
                  "(%d active %s remaining)", target, nentries,
97a979
                  pcmk__plural_alt(nentries, "entry", "entries"));
97a979
 
97a979
-    } else if (id > 0 && tp->levels[id] != NULL) {
97a979
+    } else if (tp->levels[id] != NULL) {
97a979
         guint nlevels;
97a979
 
97a979
         g_list_free_full(tp->levels[id], free);
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From ee0cfb6b284c2d6d21f8e77bf6ff286b1364235d Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 18 Nov 2021 12:33:05 -0600
97a979
Subject: [PATCH 14/19] Refactor: fencer: avoid obscuring a variable
97a979
97a979
handle_request() declared a xmlNode *reply variable, and then one of its "if"
97a979
blocks defined another one, obscuring the first. Drop the first declaration,
97a979
and instead move it to the one other place that needed it.
97a979
97a979
Also remove a redundant assertion.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 13 +++++--------
97a979
 1 file changed, 5 insertions(+), 8 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 358844203..af0a92450 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2917,7 +2917,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
     int rc = -EOPNOTSUPP;
97a979
 
97a979
     xmlNode *data = NULL;
97a979
-    xmlNode *reply = NULL;
97a979
     bool need_reply = true;
97a979
 
97a979
     char *output = NULL;
97a979
@@ -2926,8 +2925,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
 
97a979
     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
97a979
 
97a979
-    if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
-        CRM_ASSERT(client == NULL || client->request_id == id);
97a979
+    if (pcmk_is_set(call_options, st_opt_sync_call) && (client != NULL)) {
97a979
+        CRM_ASSERT(client->request_id == id);
97a979
     }
97a979
 
97a979
     if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
97a979
@@ -3156,16 +3155,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
 done:
97a979
     // Reply if result is known
97a979
     if (need_reply) {
97a979
-        if (pcmk_is_set(call_options, st_opt_sync_call)) {
97a979
-            CRM_ASSERT(client == NULL || client->request_id == id);
97a979
-        }
97a979
-        reply = stonith_construct_reply(request, output, data, rc);
97a979
+        xmlNode *reply = stonith_construct_reply(request, output, data, rc);
97a979
+
97a979
         stonith_send_reply(reply, call_options, remote_peer, client_id);
97a979
+        free_xml(reply);
97a979
     }
97a979
 
97a979
     free(output);
97a979
     free_xml(data);
97a979
-    free_xml(reply);
97a979
 
97a979
     crm_debug("Processed %s request from %s %s: %s (rc=%d)",
97a979
               op, ((client == NULL)? "peer" : "client"),
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From a5fef7b95b7541860e29c1ff33be38db327208fb Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 18 Nov 2021 12:37:10 -0600
97a979
Subject: [PATCH 15/19] Refactor: fencer: add convenience function for setting
97a979
 protocol error result
97a979
97a979
The fencer will soon track and return the full result (rather than just a
97a979
legacy return code) for fencing actions, for callbacks and notifications.
97a979
To simplify that process as well as move away from the legacy codes in general,
97a979
all fencer API operations will be modified to return a full result.
97a979
97a979
This convenience function will come in handy for that.
97a979
---
97a979
 daemons/fenced/pacemaker-fenced.h | 7 +++++++
97a979
 1 file changed, 7 insertions(+)
97a979
97a979
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
97a979
index cf88644f1..3bc5dc3d1 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.h
97a979
+++ b/daemons/fenced/pacemaker-fenced.h
97a979
@@ -262,6 +262,13 @@ gboolean node_has_attr(const char *node, const char *name, const char *value);
97a979
 
97a979
 gboolean node_does_watchdog_fencing(const char *node);
97a979
 
97a979
+static inline void
97a979
+fenced_set_protocol_error(pcmk__action_result_t *result)
97a979
+{
97a979
+    pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
97a979
+                     "Fencer API request missing required information (bug?)");
97a979
+}
97a979
+
97a979
 extern char *stonith_our_uname;
97a979
 extern gboolean stand_alone;
97a979
 extern GHashTable *device_list;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From ed770d36fb34dc7b3344cd326830a6c06cc789ce Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 19 Nov 2021 09:59:51 -0600
97a979
Subject: [PATCH 16/19] Refactor: fencer: make a few functions return void
97a979
97a979
... to make planned changes easier. The return values were previously ignored.
97a979
---
97a979
 daemons/fenced/fenced_commands.c  | 17 ++++++++-------
97a979
 daemons/fenced/fenced_history.c   |  6 +-----
97a979
 daemons/fenced/fenced_remote.c    | 35 ++++++++++++++-----------------
97a979
 daemons/fenced/pacemaker-fenced.c |  6 +++---
97a979
 daemons/fenced/pacemaker-fenced.h |  8 +++----
97a979
 5 files changed, 33 insertions(+), 39 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index af0a92450..ea7d281ce 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -1411,8 +1411,8 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
97a979
     return pcmk_ok;
97a979
 }
97a979
 
97a979
-int
97a979
-stonith_device_remove(const char *id, gboolean from_cib)
97a979
+void
97a979
+stonith_device_remove(const char *id, bool from_cib)
97a979
 {
97a979
     stonith_device_t *device = g_hash_table_lookup(device_list, id);
97a979
     guint ndevices = 0;
97a979
@@ -1421,7 +1421,7 @@ stonith_device_remove(const char *id, gboolean from_cib)
97a979
         ndevices = g_hash_table_size(device_list);
97a979
         crm_info("Device '%s' not found (%d active device%s)",
97a979
                  id, ndevices, pcmk__plural_s(ndevices));
97a979
-        return pcmk_ok;
97a979
+        return;
97a979
     }
97a979
 
97a979
     if (from_cib) {
97a979
@@ -1443,7 +1443,6 @@ stonith_device_remove(const char *id, gboolean from_cib)
97a979
                   (device->cib_registered? " cib" : ""),
97a979
                   (device->api_registered? " api" : ""));
97a979
     }
97a979
-    return pcmk_ok;
97a979
 }
97a979
 
97a979
 /*!
97a979
@@ -3085,8 +3084,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         need_reply = (rc != -EINPROGRESS);
97a979
 
97a979
     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
97a979
-        rc = stonith_fence_history(request, &data, remote_peer, call_options);
97a979
-        if (call_options & st_opt_discard_reply) {
97a979
+        stonith_fence_history(request, &data, remote_peer, call_options);
97a979
+        rc = pcmk_ok;
97a979
+        if (pcmk_is_set(call_options, st_opt_discard_reply)) {
97a979
             /* we don't expect answers to the broadcast
97a979
              * we might have sent out
97a979
              */
97a979
@@ -3109,7 +3109,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
97a979
 
97a979
         if (is_privileged(client, op)) {
97a979
-            rc = stonith_device_remove(device_id, FALSE);
97a979
+            stonith_device_remove(device_id, false);
97a979
+            rc = pcmk_ok;
97a979
         } else {
97a979
             rc = -EACCES;
97a979
         }
97a979
@@ -3179,7 +3180,7 @@ handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
97a979
     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
97a979
         process_remote_stonith_query(request);
97a979
     } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) {
97a979
-        process_remote_stonith_exec(request);
97a979
+        fenced_process_fencing_reply(request);
97a979
     } else {
97a979
         crm_err("Ignoring unknown %s reply from %s %s",
97a979
                 crm_str(op), ((client == NULL)? "peer" : "client"),
97a979
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
97a979
index 7127593b6..bc159383c 100644
97a979
--- a/daemons/fenced/fenced_history.c
97a979
+++ b/daemons/fenced/fenced_history.c
97a979
@@ -433,14 +433,11 @@ stonith_local_history(gboolean add_id, const char *target)
97a979
  *                      a reply from
97a979
  * \param[in] remote_peer
97a979
  * \param[in] options   call-options from the request
97a979
- *
97a979
- * \return always success as there is actully nothing that can go really wrong
97a979
  */
97a979
-int
97a979
+void
97a979
 stonith_fence_history(xmlNode *msg, xmlNode **output,
97a979
                       const char *remote_peer, int options)
97a979
 {
97a979
-    int rc = 0;
97a979
     const char *target = NULL;
97a979
     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER);
97a979
     xmlNode *out_history = NULL;
97a979
@@ -525,5 +522,4 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
97a979
         *output = stonith_local_history(FALSE, target);
97a979
     }
97a979
     free_xml(out_history);
97a979
-    return rc;
97a979
 }
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 60ee5e32e..6338aebde 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -2086,11 +2086,9 @@ process_remote_stonith_query(xmlNode * msg)
97a979
  * or attempt another device as appropriate.
97a979
  *
97a979
  * \param[in] msg  XML reply received
97a979
- *
97a979
- * \return pcmk_ok on success, -errno on error
97a979
  */
97a979
-int
97a979
-process_remote_stonith_exec(xmlNode * msg)
97a979
+void
97a979
+fenced_process_fencing_reply(xmlNode *msg)
97a979
 {
97a979
     int rc = 0;
97a979
     const char *id = NULL;
97a979
@@ -2098,13 +2096,13 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
     remote_fencing_op_t *op = NULL;
97a979
     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
97a979
 
97a979
-    CRM_CHECK(dev != NULL, return -EPROTO);
97a979
+    CRM_CHECK(dev != NULL, return);
97a979
 
97a979
     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
97a979
-    CRM_CHECK(id != NULL, return -EPROTO);
97a979
+    CRM_CHECK(id != NULL, return);
97a979
 
97a979
     dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR);
97a979
-    CRM_CHECK(dev != NULL, return -EPROTO);
97a979
+    CRM_CHECK(dev != NULL, return);
97a979
 
97a979
     crm_element_value_int(dev, F_STONITH_RC, &rc);
97a979
 
97a979
@@ -2125,35 +2123,35 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
         /* Could be for an event that began before we started */
97a979
         /* TODO: Record the op for later querying */
97a979
         crm_info("Received peer result of unknown or expired operation %s", id);
97a979
-        return -EOPNOTSUPP;
97a979
+        return;
97a979
     }
97a979
 
97a979
     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
97a979
         crm_err("Received outdated reply for device %s (instead of %s) to "
97a979
                 "fence (%s) %s. Operation already timed out at peer level.",
97a979
                 device, (const char *) op->devices->data, op->action, op->target);
97a979
-        return rc;
97a979
+        return;
97a979
     }
97a979
 
97a979
     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
97a979
         crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s "
97a979
-                  CRM_XS " rc=%d id=%.8s",
97a979
+                  CRM_XS " id=%.8s",
97a979
                   op->action, op->target, op->client_name, op->originator,
97a979
-                  pcmk_strerror(rc), rc, op->id);
97a979
+                  pcmk_strerror(rc), op->id);
97a979
         if (rc == pcmk_ok) {
97a979
             op->state = st_done;
97a979
         } else {
97a979
             op->state = st_failed;
97a979
         }
97a979
         remote_op_done(op, msg, rc, FALSE);
97a979
-        return pcmk_ok;
97a979
+        return;
97a979
     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
97a979
         /* If this isn't a remote level broadcast, and we are not the
97a979
          * originator of the operation, we should not be receiving this msg. */
97a979
         crm_err("Received non-broadcast fencing result for operation %.8s "
97a979
                 "we do not own (device %s targeting %s)",
97a979
                 op->id, device, op->target);
97a979
-        return rc;
97a979
+        return;
97a979
     }
97a979
 
97a979
     if (pcmk_is_set(op->call_options, st_opt_topology)) {
97a979
@@ -2168,7 +2166,7 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
          * and notify our local clients. */
97a979
         if (op->state == st_done) {
97a979
             remote_op_done(op, msg, rc, FALSE);
97a979
-            return rc;
97a979
+            return;
97a979
         }
97a979
 
97a979
         if ((op->phase == 2) && (rc != pcmk_ok)) {
97a979
@@ -2184,14 +2182,14 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
             /* An operation completed successfully. Try another device if
97a979
              * necessary, otherwise mark the operation as done. */
97a979
             advance_topology_device_in_level(op, device, msg, rc);
97a979
-            return rc;
97a979
+            return;
97a979
         } else {
97a979
             /* This device failed, time to try another topology level. If no other
97a979
              * levels are available, mark this operation as failed and report results. */
97a979
             if (advance_topology_level(op, false) != pcmk_rc_ok) {
97a979
                 op->state = st_failed;
97a979
                 remote_op_done(op, msg, rc, FALSE);
97a979
-                return rc;
97a979
+                return;
97a979
             }
97a979
         }
97a979
     } else if (rc == pcmk_ok && op->devices == NULL) {
97a979
@@ -2199,12 +2197,12 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
 
97a979
         op->state = st_done;
97a979
         remote_op_done(op, msg, rc, FALSE);
97a979
-        return rc;
97a979
+        return;
97a979
     } else if (rc == -ETIME && op->devices == NULL) {
97a979
         /* If the operation timed out don't bother retrying other peers. */
97a979
         op->state = st_failed;
97a979
         remote_op_done(op, msg, rc, FALSE);
97a979
-        return rc;
97a979
+        return;
97a979
     } else {
97a979
         /* fall-through and attempt other fencing action using another peer */
97a979
     }
97a979
@@ -2213,7 +2211,6 @@ process_remote_stonith_exec(xmlNode * msg)
97a979
     crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator,
97a979
               op->client_name, rc);
97a979
     call_remote_stonith(op, NULL, rc);
97a979
-    return rc;
97a979
 }
97a979
 
97a979
 gboolean
97a979
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
97a979
index a290e1670..0a8b3bf6f 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.c
97a979
+++ b/daemons/fenced/pacemaker-fenced.c
97a979
@@ -445,7 +445,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj)
97a979
 
97a979
         rsc_id = crm_element_value(match, XML_ATTR_ID);
97a979
 
97a979
-        stonith_device_remove(rsc_id, TRUE);
97a979
+        stonith_device_remove(rsc_id, true);
97a979
     }
97a979
 }
97a979
 
97a979
@@ -610,7 +610,7 @@ watchdog_device_update(void)
97a979
     } else {
97a979
         /* be silent if no device - todo parameter to stonith_device_remove */
97a979
         if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
97a979
-            stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
97a979
+            stonith_device_remove(STONITH_WATCHDOG_ID, true);
97a979
         }
97a979
     }
97a979
 }
97a979
@@ -847,7 +847,7 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
97a979
             }
97a979
             if (search != NULL) {
97a979
                 *search = 0;
97a979
-                stonith_device_remove(rsc_id, TRUE);
97a979
+                stonith_device_remove(rsc_id, true);
97a979
                 /* watchdog_device_update called afterwards
97a979
                    to fall back to implicit definition if needed */
97a979
             } else {
97a979
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
97a979
index 3bc5dc3d1..5162ada75 100644
97a979
--- a/daemons/fenced/pacemaker-fenced.h
97a979
+++ b/daemons/fenced/pacemaker-fenced.h
97a979
@@ -214,7 +214,7 @@ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
97a979
 
97a979
 int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
97a979
 
97a979
-int stonith_device_remove(const char *id, gboolean from_cib);
97a979
+void stonith_device_remove(const char *id, bool from_cib);
97a979
 
97a979
 char *stonith_level_key(xmlNode * msg, int mode);
97a979
 int stonith_level_kind(xmlNode * msg);
97a979
@@ -241,14 +241,14 @@ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
97a979
                                                 xmlNode *request,
97a979
                                                 gboolean manual_ack);
97a979
 
97a979
-int process_remote_stonith_exec(xmlNode * msg);
97a979
+void fenced_process_fencing_reply(xmlNode *msg);
97a979
 
97a979
 int process_remote_stonith_query(xmlNode * msg);
97a979
 
97a979
 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
97a979
 
97a979
-int stonith_fence_history(xmlNode *msg, xmlNode **output,
97a979
-                          const char *remote_peer, int options);
97a979
+void stonith_fence_history(xmlNode *msg, xmlNode **output,
97a979
+                           const char *remote_peer, int options);
97a979
 
97a979
 void stonith_fence_history_trim(void);
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 27df49460930738e77f5ca42536aff1d3bdfcae7 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Fri, 19 Nov 2021 10:06:43 -0600
97a979
Subject: [PATCH 17/19] Refactor: fencer: drop unnecessary argument when
97a979
 advancing topology device
97a979
97a979
If we're advancing to the next device in a topology level, by necessity that
97a979
means any previous device succeeded.
97a979
---
97a979
 daemons/fenced/fenced_remote.c | 19 +++++++++----------
97a979
 1 file changed, 9 insertions(+), 10 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index 6338aebde..d54e6a4ef 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -1519,14 +1519,13 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout)
97a979
  * \internal
97a979
  * \brief Advance an operation to the next device in its topology
97a979
  *
97a979
- * \param[in,out] op      Operation to advance
97a979
- * \param[in]     device  ID of device just completed
97a979
- * \param[in]     msg     XML reply that contained device result (if available)
97a979
- * \param[in]     rc      Return code of device's execution
97a979
+ * \param[in] op      Fencer operation to advance
97a979
+ * \param[in] device  ID of device that just completed
97a979
+ * \param[in] msg     If not NULL, XML reply of last delegated fencing operation
97a979
  */
97a979
 static void
97a979
 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
97a979
-                                 xmlNode *msg, int rc)
97a979
+                                 xmlNode *msg)
97a979
 {
97a979
     /* Advance to the next device at this topology level, if any */
97a979
     if (op->devices) {
97a979
@@ -1556,8 +1555,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
97a979
 
97a979
     if (op->devices) {
97a979
         /* Necessary devices remain, so execute the next one */
97a979
-        crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
97a979
-                  op->target, op->client_name, op->originator, rc);
97a979
+        crm_trace("Next targeting %s on behalf of %s@%s",
97a979
+                  op->target, op->client_name, op->originator);
97a979
 
97a979
         // The requested delay has been applied for the first device
97a979
         if (op->delay > 0) {
97a979
@@ -1570,7 +1569,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
97a979
         crm_trace("Marking complex fencing op targeting %s as complete",
97a979
                   op->target);
97a979
         op->state = st_done;
97a979
-        remote_op_done(op, msg, rc, FALSE);
97a979
+        remote_op_done(op, msg, pcmk_ok, FALSE);
97a979
     }
97a979
 }
97a979
 
97a979
@@ -1701,7 +1700,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc)
97a979
          */
97a979
         crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
97a979
                  "after successful 'off'", device, op->target);
97a979
-        advance_topology_device_in_level(op, device, NULL, pcmk_ok);
97a979
+        advance_topology_device_in_level(op, device, NULL);
97a979
         return;
97a979
 
97a979
     } else if (op->owner == FALSE) {
97a979
@@ -2181,7 +2180,7 @@ fenced_process_fencing_reply(xmlNode *msg)
97a979
         if (rc == pcmk_ok) {
97a979
             /* An operation completed successfully. Try another device if
97a979
              * necessary, otherwise mark the operation as done. */
97a979
-            advance_topology_device_in_level(op, device, msg, rc);
97a979
+            advance_topology_device_in_level(op, device, msg);
97a979
             return;
97a979
         } else {
97a979
             /* This device failed, time to try another topology level. If no other
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 05437e1339bc1f9071b43e97d5846a939687951d Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Mon, 29 Nov 2021 11:59:17 -0600
97a979
Subject: [PATCH 18/19] Refactor: fencer: minor renames for consistency
97a979
97a979
... per review
97a979
---
97a979
 daemons/fenced/fenced_remote.c | 13 ++++++-------
97a979
 1 file changed, 6 insertions(+), 7 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index d54e6a4ef..8feb40147 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -63,7 +63,7 @@ typedef struct device_properties_s {
97a979
     int delay_base[st_phase_max];
97a979
 } device_properties_t;
97a979
 
97a979
-typedef struct st_query_result_s {
97a979
+typedef struct {
97a979
     /* Name of peer that sent this result */
97a979
     char *host;
97a979
     /* Only try peers for non-topology based operations once */
97a979
@@ -95,13 +95,12 @@ sort_strings(gconstpointer a, gconstpointer b)
97a979
 static void
97a979
 free_remote_query(gpointer data)
97a979
 {
97a979
-    if (data) {
97a979
-        peer_device_info_t *query = data;
97a979
+    if (data != NULL) {
97a979
+        peer_device_info_t *peer = data;
97a979
 
97a979
-        crm_trace("Free'ing query result from %s", query->host);
97a979
-        g_hash_table_destroy(query->devices);
97a979
-        free(query->host);
97a979
-        free(query);
97a979
+        g_hash_table_destroy(peer->devices);
97a979
+        free(peer->host);
97a979
+        free(peer);
97a979
     }
97a979
 }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 86974d7cef05bafbed540d02e59514292581ae65 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 30 Nov 2021 08:33:41 -0600
97a979
Subject: [PATCH 19/19] Refactor: fencer: simplify send_async_reply()
97a979
97a979
... as suggested in review
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 28 ++++++++++++----------------
97a979
 1 file changed, 12 insertions(+), 16 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index ea7d281ce..f34cb4f13 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2384,36 +2384,34 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
                  int pid, bool merged)
97a979
 {
97a979
     xmlNode *reply = NULL;
97a979
-    bool bcast = false;
97a979
 
97a979
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
97a979
 
97a979
+    log_async_result(cmd, result, pid, NULL, merged);
97a979
+
97a979
     reply = construct_async_reply(cmd, result);
97a979
+    if (merged) {
97a979
+        crm_xml_add(reply, F_STONITH_MERGED, "true");
97a979
+    }
97a979
 
97a979
-    // If target was also the originator, broadcast fencing results for it
97a979
     if (!stand_alone && pcmk__is_fencing_action(cmd->action)
97a979
         && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) {
97a979
-
97a979
+        /* The target was also the originator, so broadcast the result on its
97a979
+         * behalf (since it will be unable to).
97a979
+         */
97a979
         crm_trace("Broadcast '%s' result for %s (target was also originator)",
97a979
                   cmd->action, cmd->victim);
97a979
         crm_xml_add(reply, F_SUBTYPE, "broadcast");
97a979
         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
97a979
-        bcast = true;
97a979
-    }
97a979
-
97a979
-    log_async_result(cmd, result, pid, NULL, merged);
97a979
-
97a979
-    if (merged) {
97a979
-        crm_xml_add(reply, F_STONITH_MERGED, "true");
97a979
-    }
97a979
-    crm_log_xml_trace(reply, "Reply");
97a979
-
97a979
-    if (bcast) {
97a979
         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
97a979
     } else {
97a979
+        // Reply only to the originator
97a979
         stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client);
97a979
     }
97a979
 
97a979
+    crm_log_xml_trace(reply, "Reply");
97a979
+    free_xml(reply);
97a979
+
97a979
     if (stand_alone) {
97a979
         /* Do notification with a clean data object */
97a979
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
97a979
@@ -2430,8 +2428,6 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
         do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data);
97a979
         do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL);
97a979
     }
97a979
-
97a979
-    free_xml(reply);
97a979
 }
97a979
 
97a979
 static void
97a979
-- 
97a979
2.27.0
97a979