Blame SOURCES/002-fencing-reasons.patch

97a979
From 95b4f87aae5fb2cf771cf9a8f8e5420b65fb213f Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 21 Sep 2021 10:47:51 -0500
97a979
Subject: [PATCH 01/12] Refactor: fencing: use pcmk__action_result_t in
97a979
 stonith_action_t
97a979
97a979
stonith_action_t previously had an rc member for a legacy return code, along
97a979
with output and error members for action stdout/stderr. When setting rc based
97a979
on the svc_action_t result, it used a mapping function svc_action_to_errno().
97a979
97a979
This replaces those with a pcmk__action_result_t member, which means we now
97a979
track the exit status and execution status as originally set by libcrmservice,
97a979
rather than the mapped rc. The library now calls the mapping function, now
97a979
returning standard codes and called result2rc(), when calling the client
97a979
callback.
97a979
97a979
The exit_reason member is unused as of this commit.
97a979
97a979
The behavior should be identical, with the small exception of
97a979
services_action_async() failure leaving the exit status as set by the services
97a979
library, which means callers will get the result2rc() mapping of the actual
97a979
result instead of the former -ECONNABORTED.
97a979
---
97a979
 lib/fencing/st_client.c | 118 +++++++++++++++++++++++-----------------
97a979
 1 file changed, 68 insertions(+), 50 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 08adb51c6..6c607b010 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -29,6 +29,7 @@
97a979
 #include <crm/msg_xml.h>
97a979
 #include <crm/common/xml.h>
97a979
 #include <crm/common/xml_internal.h>
97a979
+#include <crm/services_internal.h>
97a979
 
97a979
 #include <crm/common/mainloop.h>
97a979
 
97a979
@@ -57,9 +58,7 @@ struct stonith_action_s {
97a979
     int max_retries;
97a979
 
97a979
     int pid;
97a979
-    int rc;
97a979
-    char *output;
97a979
-    char *error;
97a979
+    pcmk__action_result_t result;
97a979
 };
97a979
 
97a979
 typedef struct stonith_private_s {
97a979
@@ -120,6 +119,7 @@ static void stonith_connection_destroy(gpointer user_data);
97a979
 static void stonith_send_notification(gpointer data, gpointer user_data);
97a979
 static int internal_stonith_action_execute(stonith_action_t * action);
97a979
 static void log_action(stonith_action_t *action, pid_t pid);
97a979
+static int result2rc(const pcmk__action_result_t *result);
97a979
 
97a979
 /*!
97a979
  * \brief Get agent namespace by name
97a979
@@ -196,6 +196,23 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
97a979
     return st_namespace_invalid;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Set an action's result based on services library result
97a979
+ *
97a979
+ * \param[in] action      Fence action to set result for
97a979
+ * \param[in] svc_action  Service action to get result from
97a979
+ */
97a979
+static void
97a979
+set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
97a979
+{
97a979
+    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
97a979
+                     NULL);
97a979
+    pcmk__set_result_output(&(action->result),
97a979
+                            services__grab_stdout(svc_action),
97a979
+                            services__grab_stderr(svc_action));
97a979
+}
97a979
+
97a979
 gboolean
97a979
 stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
97a979
 {
97a979
@@ -259,19 +276,19 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
97a979
 static void
97a979
 log_action(stonith_action_t *action, pid_t pid)
97a979
 {
97a979
-    if (action->output) {
97a979
+    if (action->result.action_stdout != NULL) {
97a979
         /* Logging the whole string confuses syslog when the string is xml */
97a979
         char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
97a979
 
97a979
-        crm_log_output(LOG_TRACE, prefix, action->output);
97a979
+        crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
97a979
         free(prefix);
97a979
     }
97a979
 
97a979
-    if (action->error) {
97a979
+    if (action->result.action_stderr != NULL) {
97a979
         /* Logging the whole string confuses syslog when the string is xml */
97a979
         char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
97a979
 
97a979
-        crm_log_output(LOG_WARNING, prefix, action->error);
97a979
+        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
97a979
         free(prefix);
97a979
     }
97a979
 }
97a979
@@ -645,8 +662,7 @@ stonith__destroy_action(stonith_action_t *action)
97a979
         if (action->svc_action) {
97a979
             services_action_free(action->svc_action);
97a979
         }
97a979
-        free(action->output);
97a979
-        free(action->error);
97a979
+        pcmk__reset_result(&(action->result));
97a979
         free(action);
97a979
     }
97a979
 }
97a979
@@ -678,15 +694,15 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
97a979
     }
97a979
     if (action != NULL) {
97a979
         if (rc) {
97a979
-            *rc = action->rc;
97a979
+            *rc = pcmk_rc2legacy(result2rc(&(action->result)));
97a979
         }
97a979
-        if (output && action->output) {
97a979
-            *output = action->output;
97a979
-            action->output = NULL; // hand off memory management to caller
97a979
+        if ((output != NULL) && (action->result.action_stdout != NULL)) {
97a979
+            *output = action->result.action_stdout;
97a979
+            action->result.action_stdout = NULL; // hand off ownership to caller
97a979
         }
97a979
-        if (error_output && action->error) {
97a979
-            *error_output = action->error;
97a979
-            action->error = NULL; // hand off memory management to caller
97a979
+        if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
97a979
+            *error_output = action->result.action_stderr;
97a979
+            action->result.action_stderr = NULL; // hand off ownership to caller
97a979
         }
97a979
     }
97a979
 }
97a979
@@ -715,6 +731,9 @@ stonith_action_create(const char *agent,
97a979
     action->timeout = action->remaining_timeout = timeout;
97a979
     action->max_retries = FAILURE_MAX_RETRIES;
97a979
 
97a979
+    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
97a979
+                     NULL);
97a979
+
97a979
     if (device_args) {
97a979
         char buffer[512];
97a979
         const char *value = NULL;
97a979
@@ -739,7 +758,8 @@ update_remaining_timeout(stonith_action_t * action)
97a979
         crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
97a979
                  action->agent, action->action, action->max_retries);
97a979
         action->remaining_timeout = 0;
97a979
-    } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
97a979
+    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
97a979
+               && (diff < (action->timeout * 0.7))) {
97a979
         /* only set remaining timeout period if there is 30%
97a979
          * or greater of the original timeout period left */
97a979
         action->remaining_timeout = action->timeout - diff;
97a979
@@ -750,31 +770,31 @@ update_remaining_timeout(stonith_action_t * action)
97a979
 }
97a979
 
97a979
 static int
97a979
-svc_action_to_errno(svc_action_t *svc_action) {
97a979
-    int rv = pcmk_ok;
97a979
+result2rc(const pcmk__action_result_t *result) {
97a979
+    int rc = pcmk_rc_ok;
97a979
 
97a979
-    if (svc_action->status == PCMK_EXEC_TIMEOUT) {
97a979
-            rv = -ETIME;
97a979
+    if (result->execution_status == PCMK_EXEC_TIMEOUT) {
97a979
+            rc = ETIME;
97a979
 
97a979
-    } else if (svc_action->rc != PCMK_OCF_OK) {
97a979
+    } else if (result->exit_status != CRM_EX_OK) {
97a979
         /* Try to provide a useful error code based on the fence agent's
97a979
          * error output.
97a979
          */
97a979
-        if (svc_action->stderr_data == NULL) {
97a979
-            rv = -ENODATA;
97a979
+        if (result->action_stderr == NULL) {
97a979
+            rc = ENODATA;
97a979
 
97a979
-        } else if (strstr(svc_action->stderr_data, "imed out")) {
97a979
+        } else if (strstr(result->action_stderr, "imed out")) {
97a979
             /* Some agents have their own internal timeouts */
97a979
-            rv = -ETIME;
97a979
+            rc = ETIME;
97a979
 
97a979
-        } else if (strstr(svc_action->stderr_data, "Unrecognised action")) {
97a979
-            rv = -EOPNOTSUPP;
97a979
+        } else if (strstr(result->action_stderr, "Unrecognised action")) {
97a979
+            rc = EOPNOTSUPP;
97a979
 
97a979
         } else {
97a979
-            rv = -pcmk_err_generic;
97a979
+            rc = pcmk_rc_error;
97a979
         }
97a979
     }
97a979
-    return rv;
97a979
+    return rc;
97a979
 }
97a979
 
97a979
 static void
97a979
@@ -782,11 +802,7 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
 {
97a979
     stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
97a979
 
97a979
-    action->rc = svc_action_to_errno(svc_action);
97a979
-    action->output = svc_action->stdout_data;
97a979
-    svc_action->stdout_data = NULL;
97a979
-    action->error = svc_action->stderr_data;
97a979
-    svc_action->stderr_data = NULL;
97a979
+    set_result_from_svc_action(action, svc_action);
97a979
 
97a979
     svc_action->params = NULL;
97a979
 
97a979
@@ -795,7 +811,9 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
 
97a979
     log_action(action, action->pid);
97a979
 
97a979
-    if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
97a979
+    if ((action->result.exit_status != CRM_EX_OK)
97a979
+        && update_remaining_timeout(action)) {
97a979
+
97a979
         int rc = internal_stonith_action_execute(action);
97a979
         if (rc == pcmk_ok) {
97a979
             return;
97a979
@@ -803,7 +821,8 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
     }
97a979
 
97a979
     if (action->done_cb) {
97a979
-        action->done_cb(action->pid, action->rc, action->output, action->userdata);
97a979
+        action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
97a979
+                        action->result.action_stdout, action->userdata);
97a979
     }
97a979
 
97a979
     action->svc_action = NULL; // don't remove our caller
97a979
@@ -835,9 +854,13 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
     static int stonith_sequence = 0;
97a979
     char *buffer = NULL;
97a979
 
97a979
-    if ((action == NULL) || (action->action == NULL) || (action->args == NULL)
97a979
+    CRM_CHECK(action != NULL, return -EINVAL);
97a979
+
97a979
+    if ((action->action == NULL) || (action->args == NULL)
97a979
         || (action->agent == NULL)) {
97a979
-        return -EPROTO;
97a979
+        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
97a979
+                         PCMK_EXEC_ERROR_FATAL, NULL);
97a979
+        return -EINVAL;
97a979
     }
97a979
 
97a979
     if (!action->tries) {
97a979
@@ -857,6 +880,7 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
     free(buffer);
97a979
 
97a979
     if (svc_action->rc != PCMK_OCF_UNKNOWN) {
97a979
+        set_result_from_svc_action(action, svc_action);
97a979
         services_action_free(svc_action);
97a979
         return -E2BIG;
97a979
     }
97a979
@@ -877,10 +901,7 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
 
97a979
     /* keep retries from executing out of control and free previous results */
97a979
     if (is_retry) {
97a979
-        free(action->output);
97a979
-        action->output = NULL;
97a979
-        free(action->error);
97a979
-        action->error = NULL;
97a979
+        pcmk__reset_result(&(action->result));
97a979
         sleep(1);
97a979
     }
97a979
 
97a979
@@ -889,22 +910,19 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
         if (services_action_async_fork_notify(svc_action,
97a979
                                               &stonith_action_async_done,
97a979
                                               &stonith_action_async_forked)) {
97a979
+            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
97a979
+                             PCMK_EXEC_PENDING, NULL);
97a979
             return pcmk_ok;
97a979
         }
97a979
 
97a979
     } else if (services_action_sync(svc_action)) { // sync success
97a979
         rc = pcmk_ok;
97a979
-        action->rc = svc_action_to_errno(svc_action);
97a979
-        action->output = svc_action->stdout_data;
97a979
-        svc_action->stdout_data = NULL;
97a979
-        action->error = svc_action->stderr_data;
97a979
-        svc_action->stderr_data = NULL;
97a979
 
97a979
     } else { // sync failure
97a979
-        action->rc = -ECONNABORTED;
97a979
-        rc = action->rc;
97a979
+        rc = -ECONNABORTED;
97a979
     }
97a979
 
97a979
+    set_result_from_svc_action(action, svc_action);
97a979
     svc_action->params = NULL;
97a979
     services_action_free(svc_action);
97a979
     return rc;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 4c8e0b0ecc53cb3883f0da0eede20b900fff48d1 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 21 Sep 2021 11:14:31 -0500
97a979
Subject: [PATCH 02/12] Low: fencing: improve return code given back to library
97a979
 callers
97a979
97a979
Expose result2rc() internally for future reuse, and expand it to handle more
97a979
cases. In theory, this can give us better log messages and status output for
97a979
failures.
97a979
---
97a979
 include/crm/fencing/internal.h |  1 +
97a979
 lib/fencing/st_client.c        | 63 +++++++++++++++++++++-------------
97a979
 2 files changed, 41 insertions(+), 23 deletions(-)
97a979
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index fa9059e6f..0d23967bb 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -60,6 +60,7 @@ stonith_action_t *stonith_action_create(const char *agent,
97a979
 void stonith__destroy_action(stonith_action_t *action);
97a979
 void stonith__action_result(stonith_action_t *action, int *rc, char **output,
97a979
                             char **error_output);
97a979
+int stonith__result2rc(const pcmk__action_result_t *result);
97a979
 
97a979
 int
97a979
 stonith_action_execute_async(stonith_action_t * action,
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 6c607b010..809be1640 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -119,7 +119,6 @@ static void stonith_connection_destroy(gpointer user_data);
97a979
 static void stonith_send_notification(gpointer data, gpointer user_data);
97a979
 static int internal_stonith_action_execute(stonith_action_t * action);
97a979
 static void log_action(stonith_action_t *action, pid_t pid);
97a979
-static int result2rc(const pcmk__action_result_t *result);
97a979
 
97a979
 /*!
97a979
  * \brief Get agent namespace by name
97a979
@@ -694,7 +693,7 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
97a979
     }
97a979
     if (action != NULL) {
97a979
         if (rc) {
97a979
-            *rc = pcmk_rc2legacy(result2rc(&(action->result)));
97a979
+            *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
97a979
         }
97a979
         if ((output != NULL) && (action->result.action_stdout != NULL)) {
97a979
             *output = action->result.action_stdout;
97a979
@@ -769,32 +768,49 @@ update_remaining_timeout(stonith_action_t * action)
97a979
     return action->remaining_timeout ? TRUE : FALSE;
97a979
 }
97a979
 
97a979
-static int
97a979
-result2rc(const pcmk__action_result_t *result) {
97a979
-    int rc = pcmk_rc_ok;
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Map a fencing action result to a standard return code
97a979
+ *
97a979
+ * \param[in] result  Fencing action result to map
97a979
+ *
97a979
+ * \return Standard Pacemaker return code that best corresponds to \p result
97a979
+ */
97a979
+int
97a979
+stonith__result2rc(const pcmk__action_result_t *result)
97a979
+{
97a979
+    switch (result->execution_status) {
97a979
+        case PCMK_EXEC_CANCELLED:       return ECANCELED;
97a979
+        case PCMK_EXEC_TIMEOUT:         return ETIME;
97a979
+        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
97a979
+        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
97a979
+        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
97a979
+        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
97a979
+        case PCMK_EXEC_NO_SECRETS:      return EACCES;
97a979
+        default:                        break;
97a979
+    }
97a979
 
97a979
-    if (result->execution_status == PCMK_EXEC_TIMEOUT) {
97a979
-            rc = ETIME;
97a979
+    if (result->exit_status == CRM_EX_OK) {
97a979
+        return pcmk_rc_ok;
97a979
+    }
97a979
 
97a979
-    } else if (result->exit_status != CRM_EX_OK) {
97a979
-        /* Try to provide a useful error code based on the fence agent's
97a979
-         * error output.
97a979
-         */
97a979
-        if (result->action_stderr == NULL) {
97a979
-            rc = ENODATA;
97a979
+    // Try to provide useful error code based on result's error output
97a979
 
97a979
-        } else if (strstr(result->action_stderr, "imed out")) {
97a979
-            /* Some agents have their own internal timeouts */
97a979
-            rc = ETIME;
97a979
+    if (result->action_stderr == NULL) {
97a979
+        return ENODATA;
97a979
 
97a979
-        } else if (strstr(result->action_stderr, "Unrecognised action")) {
97a979
-            rc = EOPNOTSUPP;
97a979
+    } else if (strcasestr(result->action_stderr, "timed out")
97a979
+               || strcasestr(result->action_stderr, "timeout")) {
97a979
+        return ETIME;
97a979
 
97a979
-        } else {
97a979
-            rc = pcmk_rc_error;
97a979
-        }
97a979
+    } else if (strcasestr(result->action_stderr, "unrecognised action")
97a979
+               || strcasestr(result->action_stderr, "unrecognized action")
97a979
+               || strcasestr(result->action_stderr, "unsupported action")) {
97a979
+        return EOPNOTSUPP;
97a979
     }
97a979
-    return rc;
97a979
+
97a979
+    // Oh well, we tried
97a979
+    return pcmk_rc_error;
97a979
 }
97a979
 
97a979
 static void
97a979
@@ -821,7 +837,8 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
     }
97a979
 
97a979
     if (action->done_cb) {
97a979
-        action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
97a979
+        action->done_cb(action->pid,
97a979
+                        pcmk_rc2legacy(stonith__result2rc(&(action->result))),
97a979
                         action->result.action_stdout, action->userdata);
97a979
     }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 153c9b552a5bad9dd36e8635fa478ed9cad1f240 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 7 Oct 2021 11:35:44 -0500
97a979
Subject: [PATCH 03/12] Refactor: fencing: return full result from
97a979
 stonith__action_result()
97a979
97a979
Previously, stonith__action_result() grabbed an action's legacy rc, stdout, and
97a979
stderr separately. Now, directly return a pointer to the action's result
97a979
object, and map that to a legacy rc in the callers when needed.
97a979
---
97a979
 include/crm/fencing/internal.h |  3 +--
97a979
 lib/fencing/st_client.c        | 36 ++++---------------------
97a979
 lib/fencing/st_rhcs.c          | 48 ++++++++++++++++++++++++----------
97a979
 3 files changed, 40 insertions(+), 47 deletions(-)
97a979
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index 0d23967bb..4e9f50fe8 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -58,8 +58,7 @@ stonith_action_t *stonith_action_create(const char *agent,
97a979
                                         GHashTable * port_map,
97a979
                                         const char * host_arg);
97a979
 void stonith__destroy_action(stonith_action_t *action);
97a979
-void stonith__action_result(stonith_action_t *action, int *rc, char **output,
97a979
-                            char **error_output);
97a979
+pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
97a979
 int stonith__result2rc(const pcmk__action_result_t *result);
97a979
 
97a979
 int
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 809be1640..b9df18465 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -670,40 +670,14 @@ stonith__destroy_action(stonith_action_t *action)
97a979
  * \internal
97a979
  * \brief Get the result of an executed stonith action
97a979
  *
97a979
- * \param[in,out] action        Executed action
97a979
- * \param[out]    rc            Where to store result code (or NULL)
97a979
- * \param[out]    output        Where to store standard output (or NULL)
97a979
- * \param[out]    error_output  Where to store standard error output (or NULL)
97a979
+ * \param[in] action  Executed action
97a979
  *
97a979
- * \note If output or error_output is not NULL, the caller is responsible for
97a979
- *       freeing the memory.
97a979
+ * \return Pointer to action's result (or NULL if \p action is NULL)
97a979
  */
97a979
-void
97a979
-stonith__action_result(stonith_action_t *action, int *rc, char **output,
97a979
-                       char **error_output)
97a979
+pcmk__action_result_t *
97a979
+stonith__action_result(stonith_action_t *action)
97a979
 {
97a979
-    if (rc) {
97a979
-        *rc = pcmk_ok;
97a979
-    }
97a979
-    if (output) {
97a979
-        *output = NULL;
97a979
-    }
97a979
-    if (error_output) {
97a979
-        *error_output = NULL;
97a979
-    }
97a979
-    if (action != NULL) {
97a979
-        if (rc) {
97a979
-            *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
97a979
-        }
97a979
-        if ((output != NULL) && (action->result.action_stdout != NULL)) {
97a979
-            *output = action->result.action_stdout;
97a979
-            action->result.action_stdout = NULL; // hand off ownership to caller
97a979
-        }
97a979
-        if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
97a979
-            *error_output = action->result.action_stderr;
97a979
-            action->result.action_stderr = NULL; // hand off ownership to caller
97a979
-        }
97a979
-    }
97a979
+    return (action == NULL)? NULL : &(action->result);
97a979
 }
97a979
 
97a979
 #define FAILURE_MAX_RETRIES 2
97a979
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
97a979
index 89a2625bd..23e694975 100644
97a979
--- a/lib/fencing/st_rhcs.c
97a979
+++ b/lib/fencing/st_rhcs.c
97a979
@@ -1,5 +1,5 @@
97a979
 /*
97a979
- * Copyright 2004-2020 the Pacemaker project contributors
97a979
+ * Copyright 2004-2021 the Pacemaker project contributors
97a979
  *
97a979
  * The version control history for this file may have further details.
97a979
  *
97a979
@@ -123,10 +123,10 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
97a979
 static int
97a979
 stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
97a979
 {
97a979
-    char *buffer = NULL;
97a979
     xmlNode *xml = NULL;
97a979
     xmlNode *actions = NULL;
97a979
     xmlXPathObject *xpathObj = NULL;
97a979
+    pcmk__action_result_t *result = NULL;
97a979
     stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
97a979
                                                      5, NULL, NULL, NULL);
97a979
     int rc = stonith__execute(action);
97a979
@@ -138,23 +138,31 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
97a979
         return rc;
97a979
     }
97a979
 
97a979
-    stonith__action_result(action, &rc, &buffer, NULL);
97a979
-    stonith__destroy_action(action);
97a979
-    if (rc < 0) {
97a979
-        crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d",
97a979
-                 agent, pcmk_strerror(rc), rc);
97a979
-        free(buffer);
97a979
-        return rc;
97a979
+    result = stonith__action_result(action);
97a979
+
97a979
+    if (result->execution_status != PCMK_EXEC_DONE) {
97a979
+        crm_warn("Could not execute metadata action for %s: %s",
97a979
+                 agent, pcmk_exec_status_str(result->execution_status));
97a979
+        stonith__destroy_action(action);
97a979
+        return pcmk_rc2legacy(stonith__result2rc(result));
97a979
     }
97a979
 
97a979
-    if (buffer == NULL) {
97a979
+    if (result->exit_status != CRM_EX_OK) {
97a979
+        crm_warn("Metadata action for %s returned error code %d",
97a979
+                 agent, result->exit_status);
97a979
+        stonith__destroy_action(action);
97a979
+        return pcmk_rc2legacy(stonith__result2rc(result));
97a979
+    }
97a979
+
97a979
+    if (result->action_stdout == NULL) {
97a979
         crm_warn("Metadata action for %s returned no data", agent);
97a979
+        stonith__destroy_action(action);
97a979
         return -ENODATA;
97a979
     }
97a979
 
97a979
-    xml = string2xml(buffer);
97a979
-    free(buffer);
97a979
-    buffer = NULL;
97a979
+    xml = string2xml(result->action_stdout);
97a979
+    stonith__destroy_action(action);
97a979
+
97a979
     if (xml == NULL) {
97a979
         crm_warn("Metadata for %s is invalid", agent);
97a979
         return -pcmk_err_schema_validation;
97a979
@@ -289,7 +297,19 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target,
97a979
 
97a979
     rc = stonith__execute(action);
97a979
     if (rc == pcmk_ok) {
97a979
-        stonith__action_result(action, &rc, output, error_output);
97a979
+        pcmk__action_result_t *result = stonith__action_result(action);
97a979
+
97a979
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
+
97a979
+        // Take ownership of output so stonith__destroy_action() doesn't free it
97a979
+        if (output != NULL) {
97a979
+            *output = result->action_stdout;
97a979
+            result->action_stdout = NULL;
97a979
+        }
97a979
+        if (error_output != NULL) {
97a979
+            *error_output = result->action_stderr;
97a979
+            result->action_stderr = NULL;
97a979
+        }
97a979
     }
97a979
     stonith__destroy_action(action);
97a979
     return rc;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 7f7067014357cccb229a0bef091e234eb3765f7a Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 21 Sep 2021 13:05:54 -0500
97a979
Subject: [PATCH 04/12] Refactor: fencing: pass full result to async action
97a979
 callback
97a979
97a979
When executing an asynchronous fence agent command, the fencing library gets
97a979
the full result (exit status, execution status, and exit reason) from the
97a979
services library, then maps that to a legacy return code.
97a979
97a979
Now, pass the full result object to the fencing async callback, rather than
97a979
separate arguments for legacy code and stdout. The mapping to a legacy code now
97a979
happens in the fencer rather than the fencing library.
97a979
97a979
The goal of this and following commits is to push the full result object
97a979
further down the code path, so that ultimately the full result is always
97a979
available internally, and the legacy code mapping is only done for backward
97a979
compatibility when sending the result back to a client.
97a979
97a979
This commit focuses on the async callback (done_cb() in both the fencer's
97a979
async_command_t and the fencing library's stonith_action_t). Later commits will
97a979
follow the chain:
97a979
97a979
	st_child_done() and stonith_fence_get_devices_cb()
97a979
	-> stonith_send_async_reply()
97a979
	-> stonith_construct_async_reply() and log_async_result()
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 78 +++++++++++++++++++++-----------
97a979
 include/crm/fencing/internal.h   |  3 +-
97a979
 lib/fencing/st_client.c          | 10 ++--
97a979
 3 files changed, 58 insertions(+), 33 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index b5ae28d90..d5d04ae69 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -62,7 +62,8 @@ struct device_search_s {
97a979
 };
97a979
 
97a979
 static gboolean stonith_device_dispatch(gpointer user_data);
97a979
-static void st_child_done(int pid, int rc, const char *output, void *user_data);
97a979
+static void st_child_done(int pid, const pcmk__action_result_t *result,
97a979
+                          void *user_data);
97a979
 static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
97a979
                                const char *client_id);
97a979
 
97a979
@@ -99,7 +100,8 @@ typedef struct async_command_s {
97a979
     GList *device_next;
97a979
 
97a979
     void *internal_user_data;
97a979
-    void (*done_cb) (int pid, int rc, const char *output, void *user_data);
97a979
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
97a979
+                     void *user_data);
97a979
     guint timer_sigterm;
97a979
     guint timer_sigkill;
97a979
     /*! If the operation timed out, this is the last signal
97a979
@@ -377,13 +379,25 @@ get_agent_metadata_cb(gpointer data) {
97a979
  * \internal
97a979
  * \brief Call a command's action callback for an internal (not library) result
97a979
  *
97a979
- * \param[in] cmd     Command to report result for
97a979
- * \param[in] rc      Legacy return code to pass to callback
97a979
+ * \param[in] cmd               Command to report result for
97a979
+ * \param[in] execution_status  Execution status to use for result
97a979
+ * \param[in] exit_status       Exit status to use for result
97a979
+ * \param[in] exit_reason       Exit reason to use for result
97a979
  */
97a979
 static void
97a979
-report_internal_result(async_command_t *cmd, int rc)
97a979
+report_internal_result(async_command_t *cmd, int exit_status,
97a979
+                       int execution_status, const char *exit_reason)
97a979
 {
97a979
-    cmd->done_cb(0, rc, NULL, cmd);
97a979
+    pcmk__action_result_t result = {
97a979
+        // Ensure we don't pass garbage to free()
97a979
+        .exit_reason = NULL,
97a979
+        .action_stdout = NULL,
97a979
+        .action_stderr = NULL
97a979
+    };
97a979
+
97a979
+    pcmk__set_result(&result, exit_status, execution_status, exit_reason);
97a979
+    cmd->done_cb(0, &result, cmd);
97a979
+    pcmk__reset_result(&result);
97a979
 }
97a979
 
97a979
 static gboolean
97a979
@@ -446,7 +460,7 @@ stonith_device_execute(stonith_device_t * device)
97a979
             }
97a979
         } else {
97a979
             crm_info("Faking success for %s watchdog operation", cmd->action);
97a979
-            report_internal_result(cmd, pcmk_ok);
97a979
+            report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
97a979
             goto done;
97a979
         }
97a979
     }
97a979
@@ -462,7 +476,8 @@ stonith_device_execute(stonith_device_t * device)
97a979
             crm_err("Considering %s unconfigured "
97a979
                     "because unable to load CIB secrets: %s",
97a979
                      device->id, pcmk_rc_str(exec_rc));
97a979
-            report_internal_result(cmd, -EACCES);
97a979
+            report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
97a979
+                                   NULL);
97a979
             goto done;
97a979
         }
97a979
     }
97a979
@@ -501,7 +516,7 @@ stonith_device_execute(stonith_device_t * device)
97a979
                                            cmd->done_cb, fork_cb);
97a979
     if (exec_rc < 0) {
97a979
         cmd->activating_on = NULL;
97a979
-        report_internal_result(cmd, exec_rc);
97a979
+        cmd->done_cb(0, stonith__action_result(action), cmd);
97a979
         stonith__destroy_action(action);
97a979
     }
97a979
 
97a979
@@ -625,7 +640,8 @@ free_device(gpointer data)
97a979
         async_command_t *cmd = gIter->data;
97a979
 
97a979
         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
97a979
-        report_internal_result(cmd, -ENODEV);
97a979
+        report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
97a979
+                               NULL);
97a979
     }
97a979
     g_list_free(device->pending_ops);
97a979
 
97a979
@@ -1079,7 +1095,8 @@ schedule_internal_command(const char *origin,
97a979
                           const char *victim,
97a979
                           int timeout,
97a979
                           void *internal_user_data,
97a979
-                          void (*done_cb) (int pid, int rc, const char *output,
97a979
+                          void (*done_cb) (int pid,
97a979
+                                           const pcmk__action_result_t *result,
97a979
                                            void *user_data))
97a979
 {
97a979
     async_command_t *cmd = NULL;
97a979
@@ -1111,7 +1128,7 @@ enum fence_status_code {
97a979
 };
97a979
 
97a979
 static void
97a979
-status_search_cb(int pid, int rc, const char *output, void *user_data)
97a979
+status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 {
97a979
     async_command_t *cmd = user_data;
97a979
     struct device_search_s *search = cmd->internal_user_data;
97a979
@@ -1127,7 +1144,7 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
97a979
 
97a979
     mainloop_set_trigger(dev->work);
97a979
 
97a979
-    switch (rc) {
97a979
+    switch (result->exit_status) {
97a979
         case fence_status_unknown:
97a979
             crm_trace("%s reported it cannot fence %s", dev->id, search->host);
97a979
             break;
97a979
@@ -1141,14 +1158,15 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
97a979
         default:
97a979
             crm_warn("Assuming %s cannot fence %s "
97a979
                      "(status returned unknown code %d)",
97a979
-                     dev->id, search->host, rc);
97a979
+                     dev->id, search->host, result->exit_status);
97a979
             break;
97a979
     }
97a979
     search_devices_record_result(search, dev->id, can);
97a979
 }
97a979
 
97a979
 static void
97a979
-dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
97a979
+dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
97a979
+                       void *user_data)
97a979
 {
97a979
     async_command_t *cmd = user_data;
97a979
     struct device_search_s *search = cmd->internal_user_data;
97a979
@@ -1169,21 +1187,21 @@ dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
97a979
 
97a979
     mainloop_set_trigger(dev->work);
97a979
 
97a979
-    if (rc == CRM_EX_OK) {
97a979
+    if (result->exit_status == CRM_EX_OK) {
97a979
         crm_info("Refreshing target list for %s", dev->id);
97a979
         g_list_free_full(dev->targets, free);
97a979
-        dev->targets = stonith__parse_targets(output);
97a979
+        dev->targets = stonith__parse_targets(result->action_stdout);
97a979
         dev->targets_age = time(NULL);
97a979
 
97a979
     } else if (dev->targets != NULL) {
97a979
         crm_info("Reusing most recent target list for %s "
97a979
                  "because list returned error code %d",
97a979
-                 dev->id, rc);
97a979
+                 dev->id, result->exit_status);
97a979
 
97a979
     } else { // We have never successfully executed list
97a979
         crm_warn("Assuming %s cannot fence %s "
97a979
                  "because list returned error code %d",
97a979
-                 dev->id, search->host, rc);
97a979
+                 dev->id, search->host, result->exit_status);
97a979
 
97a979
         /* Fall back to pcmk_host_check="status" if the user didn't explicitly
97a979
          * specify "dynamic-list".
97a979
@@ -2407,7 +2425,7 @@ cancel_stonith_command(async_command_t * cmd)
97a979
 }
97a979
 
97a979
 static void
97a979
-st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
+st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 {
97a979
     stonith_device_t *device = NULL;
97a979
     stonith_device_t *next_device = NULL;
97a979
@@ -2423,7 +2441,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
     /* The device is ready to do something else now */
97a979
     device = g_hash_table_lookup(device_list, cmd->device);
97a979
     if (device) {
97a979
-        if (!device->verified && (rc == pcmk_ok) &&
97a979
+        if (!device->verified && (result->exit_status == CRM_EX_OK) &&
97a979
             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
97a979
 
97a979
             device->verified = TRUE;
97a979
@@ -2432,7 +2450,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
         mainloop_set_trigger(device->work);
97a979
     }
97a979
 
97a979
-    if (rc == 0) {
97a979
+    if (result->exit_status == CRM_EX_OK) {
97a979
         GList *iter;
97a979
         /* see if there are any required devices left to execute for this op */
97a979
         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
97a979
@@ -2445,7 +2463,8 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
             next_device = NULL;
97a979
         }
97a979
 
97a979
-    } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
97a979
+    } else if ((cmd->device_next != NULL)
97a979
+               && !is_action_required(cmd->action, device)) {
97a979
         /* if this device didn't work out, see if there are any others we can try.
97a979
          * if the failed device was 'required', we can't pick another device. */
97a979
         next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
97a979
@@ -2454,16 +2473,19 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
 
97a979
     /* this operation requires more fencing, hooray! */
97a979
     if (next_device) {
97a979
-        log_async_result(cmd, rc, pid, next_device->id, output, FALSE);
97a979
+        log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
97a979
+                         next_device->id, result->action_stdout, FALSE);
97a979
         schedule_stonith_command(cmd, next_device);
97a979
         /* Prevent cmd from being freed */
97a979
         cmd = NULL;
97a979
         goto done;
97a979
     }
97a979
 
97a979
-    stonith_send_async_reply(cmd, output, rc, pid, false);
97a979
+    stonith_send_async_reply(cmd, result->action_stdout,
97a979
+                             pcmk_rc2legacy(stonith__result2rc(result)), pid,
97a979
+                             false);
97a979
 
97a979
-    if (rc != 0) {
97a979
+    if (result->exit_status != CRM_EX_OK) {
97a979
         goto done;
97a979
     }
97a979
 
97a979
@@ -2509,7 +2531,9 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
97a979
 
97a979
         cmd_list = g_list_remove_link(cmd_list, gIter);
97a979
 
97a979
-        stonith_send_async_reply(cmd_other, output, rc, pid, true);
97a979
+        stonith_send_async_reply(cmd_other, result->action_stdout,
97a979
+                                 pcmk_rc2legacy(stonith__result2rc(result)),
97a979
+                                 pid, true);
97a979
         cancel_stonith_command(cmd_other);
97a979
 
97a979
         free_async_command(cmd_other);
97a979
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
97a979
index 4e9f50fe8..6a7e4232c 100644
97a979
--- a/include/crm/fencing/internal.h
97a979
+++ b/include/crm/fencing/internal.h
97a979
@@ -64,7 +64,8 @@ int stonith__result2rc(const pcmk__action_result_t *result);
97a979
 int
97a979
 stonith_action_execute_async(stonith_action_t * action,
97a979
                              void *userdata,
97a979
-                             void (*done) (int pid, int rc, const char *output,
97a979
+                             void (*done) (int pid,
97a979
+                                           const pcmk__action_result_t *result,
97a979
                                            void *user_data),
97a979
                              void (*fork_cb) (int pid, void *user_data));
97a979
 
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index b9df18465..59dcab9a3 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -46,7 +46,8 @@ struct stonith_action_s {
97a979
     int timeout;
97a979
     int async;
97a979
     void *userdata;
97a979
-    void (*done_cb) (int pid, int status, const char *output, void *user_data);
97a979
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
97a979
+                     void *user_data);
97a979
     void (*fork_cb) (int pid, void *user_data);
97a979
 
97a979
     svc_action_t *svc_action;
97a979
@@ -811,9 +812,7 @@ stonith_action_async_done(svc_action_t *svc_action)
97a979
     }
97a979
 
97a979
     if (action->done_cb) {
97a979
-        action->done_cb(action->pid,
97a979
-                        pcmk_rc2legacy(stonith__result2rc(&(action->result))),
97a979
-                        action->result.action_stdout, action->userdata);
97a979
+        action->done_cb(action->pid, &(action->result), action->userdata);
97a979
     }
97a979
 
97a979
     action->svc_action = NULL; // don't remove our caller
97a979
@@ -933,7 +932,8 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
 int
97a979
 stonith_action_execute_async(stonith_action_t * action,
97a979
                              void *userdata,
97a979
-                             void (*done) (int pid, int rc, const char *output,
97a979
+                             void (*done) (int pid,
97a979
+                                           const pcmk__action_result_t *result,
97a979
                                            void *user_data),
97a979
                              void (*fork_cb) (int pid, void *user_data))
97a979
 {
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From bbd022306df7a873c0ecb2be2d33c56fbf327b8c Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 21 Sep 2021 11:51:28 -0500
97a979
Subject: [PATCH 05/12] Feature: fencing: set exit reason for internal
97a979
 execution errors
97a979
97a979
... most importantly, copying any exit reason set by the services library.
97a979
This ensures that the stonith_action_t exit reason is set when appropriate.
97a979
However, nothing uses it as of this commit.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 4 ++--
97a979
 lib/fencing/st_client.c          | 6 +++---
97a979
 2 files changed, 5 insertions(+), 5 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index d5d04ae69..f55a32649 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -477,7 +477,7 @@ stonith_device_execute(stonith_device_t * device)
97a979
                     "because unable to load CIB secrets: %s",
97a979
                      device->id, pcmk_rc_str(exec_rc));
97a979
             report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
97a979
-                                   NULL);
97a979
+                                   "Failed to get CIB secrets");
97a979
             goto done;
97a979
         }
97a979
     }
97a979
@@ -641,7 +641,7 @@ free_device(gpointer data)
97a979
 
97a979
         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
97a979
         report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
97a979
-                               NULL);
97a979
+                               "Device was removed before action could be executed");
97a979
     }
97a979
     g_list_free(device->pending_ops);
97a979
 
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 59dcab9a3..3d4127eff 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -207,7 +207,7 @@ static void
97a979
 set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
97a979
 {
97a979
     pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
97a979
-                     NULL);
97a979
+                     services__exit_reason(svc_action));
97a979
     pcmk__set_result_output(&(action->result),
97a979
                             services__grab_stdout(svc_action),
97a979
                             services__grab_stderr(svc_action));
97a979
@@ -706,7 +706,7 @@ stonith_action_create(const char *agent,
97a979
     action->max_retries = FAILURE_MAX_RETRIES;
97a979
 
97a979
     pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
97a979
-                     NULL);
97a979
+                     "Initialization bug in fencing library");
97a979
 
97a979
     if (device_args) {
97a979
         char buffer[512];
97a979
@@ -849,7 +849,7 @@ internal_stonith_action_execute(stonith_action_t * action)
97a979
     if ((action->action == NULL) || (action->args == NULL)
97a979
         || (action->agent == NULL)) {
97a979
         pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
97a979
-                         PCMK_EXEC_ERROR_FATAL, NULL);
97a979
+                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
97a979
         return -EINVAL;
97a979
     }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From ed08f600688af1d25412d2427502ba5d4a55c0d6 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 7 Oct 2021 12:06:10 -0500
97a979
Subject: [PATCH 06/12] Fix: fencer: handle dynamic target query failures
97a979
 better
97a979
97a979
Previously, the callbacks for list and status queries checked only the result's
97a979
exit status. However, the services library will use PCMK_OCF_UNKNOWN_ERROR (1)
97a979
as the exit status for internal failures, and that value signifies a recognized
97a979
node (not an error) for fence list actions.
97a979
97a979
Now, the callbacks check the execution status as well.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 46 +++++++++++++++++++++++++++-----
97a979
 1 file changed, 39 insertions(+), 7 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index f55a32649..7b3fb25a1 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -1144,6 +1144,18 @@ status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 
97a979
     mainloop_set_trigger(dev->work);
97a979
 
97a979
+    if (result->execution_status != PCMK_EXEC_DONE) {
97a979
+        crm_warn("Assuming %s cannot fence %s "
97a979
+                 "because status could not be executed: %s%s%s%s",
97a979
+                 dev->id, search->host,
97a979
+                 pcmk_exec_status_str(result->execution_status),
97a979
+                 ((result->exit_reason == NULL)? "" : " ("),
97a979
+                 ((result->exit_reason == NULL)? "" : result->exit_reason),
97a979
+                 ((result->exit_reason == NULL)? "" : ")"));
97a979
+        search_devices_record_result(search, dev->id, FALSE);
97a979
+        return;
97a979
+    }
97a979
+
97a979
     switch (result->exit_status) {
97a979
         case fence_status_unknown:
97a979
             crm_trace("%s reported it cannot fence %s", dev->id, search->host);
97a979
@@ -1187,21 +1199,41 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
97a979
 
97a979
     mainloop_set_trigger(dev->work);
97a979
 
97a979
-    if (result->exit_status == CRM_EX_OK) {
97a979
+    if ((result->execution_status == PCMK_EXEC_DONE)
97a979
+        && (result->exit_status == CRM_EX_OK)) {
97a979
         crm_info("Refreshing target list for %s", dev->id);
97a979
         g_list_free_full(dev->targets, free);
97a979
         dev->targets = stonith__parse_targets(result->action_stdout);
97a979
         dev->targets_age = time(NULL);
97a979
 
97a979
     } else if (dev->targets != NULL) {
97a979
-        crm_info("Reusing most recent target list for %s "
97a979
-                 "because list returned error code %d",
97a979
-                 dev->id, result->exit_status);
97a979
+        if (result->execution_status == PCMK_EXEC_DONE) {
97a979
+            crm_info("Reusing most recent target list for %s "
97a979
+                     "because list returned error code %d",
97a979
+                     dev->id, result->exit_status);
97a979
+        } else {
97a979
+            crm_info("Reusing most recent target list for %s "
97a979
+                     "because list could not be executed: %s%s%s%s",
97a979
+                     dev->id, pcmk_exec_status_str(result->execution_status),
97a979
+                     ((result->exit_reason == NULL)? "" : " ("),
97a979
+                     ((result->exit_reason == NULL)? "" : result->exit_reason),
97a979
+                     ((result->exit_reason == NULL)? "" : ")"));
97a979
+        }
97a979
 
97a979
     } else { // We have never successfully executed list
97a979
-        crm_warn("Assuming %s cannot fence %s "
97a979
-                 "because list returned error code %d",
97a979
-                 dev->id, search->host, result->exit_status);
97a979
+        if (result->execution_status == PCMK_EXEC_DONE) {
97a979
+            crm_warn("Assuming %s cannot fence %s "
97a979
+                     "because list returned error code %d",
97a979
+                     dev->id, search->host, result->exit_status);
97a979
+        } else {
97a979
+            crm_warn("Assuming %s cannot fence %s "
97a979
+                     "because list could not be executed: %s%s%s%s",
97a979
+                     dev->id, search->host,
97a979
+                     pcmk_exec_status_str(result->execution_status),
97a979
+                     ((result->exit_reason == NULL)? "" : " ("),
97a979
+                     ((result->exit_reason == NULL)? "" : result->exit_reason),
97a979
+                     ((result->exit_reason == NULL)? "" : ")"));
97a979
+        }
97a979
 
97a979
         /* Fall back to pcmk_host_check="status" if the user didn't explicitly
97a979
          * specify "dynamic-list".
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 5a30238a3b8691a5fc20f53906c0efcc50193306 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 21 Sep 2021 15:57:50 -0500
97a979
Subject: [PATCH 07/12] Refactor: fencer: pass result object when sending an
97a979
 async reply
97a979
97a979
... via stonith_send_async_reply(), instead of sending the mapped legacy code
97a979
and action stdout separately. Also, drop the "stonith_" prefix since the
97a979
function is static.
97a979
97a979
This moves the mapping from the stonith_send_async_reply() callers to the
97a979
function itself, so we use the result object and standard codes as long as
97a979
possible, and map to a legacy code only where needed.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 62 +++++++++++++++++++-------------
97a979
 daemons/fenced/fenced_remote.c   |  2 +-
97a979
 2 files changed, 39 insertions(+), 25 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 7b3fb25a1..e5f8162ce 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2376,12 +2376,28 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
97a979
     }
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Reply to requester after asynchronous command completion
97a979
+ *
97a979
+ * \param[in] cmd      Command that completed
97a979
+ * \param[in] result   Result of command
97a979
+ * \param[in] pid      Process ID of command, if available
97a979
+ * \param[in] merged   If true, command was merged with another, not executed
97a979
+ */
97a979
 static void
97a979
-stonith_send_async_reply(async_command_t *cmd, const char *output, int rc,
97a979
-                         int pid, bool merged)
97a979
+send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
+                 int pid, bool merged)
97a979
 {
97a979
     xmlNode *reply = NULL;
97a979
     gboolean bcast = FALSE;
97a979
+    const char *output = NULL;
97a979
+    int rc = pcmk_ok;
97a979
+
97a979
+    CRM_CHECK((cmd != NULL) && (result != NULL), return);
97a979
+
97a979
+    output = result->action_stdout;
97a979
+    rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
 
97a979
     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
97a979
 
97a979
@@ -2513,9 +2529,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
         goto done;
97a979
     }
97a979
 
97a979
-    stonith_send_async_reply(cmd, result->action_stdout,
97a979
-                             pcmk_rc2legacy(stonith__result2rc(result)), pid,
97a979
-                             false);
97a979
+    send_async_reply(cmd, result, pid, false);
97a979
 
97a979
     if (result->exit_status != CRM_EX_OK) {
97a979
         goto done;
97a979
@@ -2563,9 +2577,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 
97a979
         cmd_list = g_list_remove_link(cmd_list, gIter);
97a979
 
97a979
-        stonith_send_async_reply(cmd_other, result->action_stdout,
97a979
-                                 pcmk_rc2legacy(stonith__result2rc(result)),
97a979
-                                 pid, true);
97a979
+        send_async_reply(cmd_other, result, pid, true);
97a979
         cancel_stonith_command(cmd_other);
97a979
 
97a979
         free_async_command(cmd_other);
97a979
@@ -2604,26 +2616,28 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
97a979
         /* Order based on priority */
97a979
         devices = g_list_sort(devices, sort_device_priority);
97a979
         device = g_hash_table_lookup(device_list, devices->data);
97a979
-
97a979
-        if (device) {
97a979
-            cmd->device_list = devices;
97a979
-            cmd->device_next = devices->next;
97a979
-            devices = NULL;     /* list owned by cmd now */
97a979
-        }
97a979
     }
97a979
 
97a979
-    /* we have a device, schedule it for fencing. */
97a979
-    if (device) {
97a979
-        schedule_stonith_command(cmd, device);
97a979
-        /* in progress */
97a979
-        return;
97a979
-    }
97a979
+    if (device == NULL) { // No device found
97a979
+        pcmk__action_result_t result = {
97a979
+            // Ensure we don't pass garbage to free()
97a979
+            .exit_reason = NULL,
97a979
+            .action_stdout = NULL,
97a979
+            .action_stderr = NULL
97a979
+        };
97a979
 
97a979
-    /* no device found! */
97a979
-    stonith_send_async_reply(cmd, NULL, -ENODEV, 0, false);
97a979
+        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
97a979
+                         "No fence device configured for target");
97a979
+        send_async_reply(cmd, &result, 0, false);
97a979
+        pcmk__reset_result(&result);
97a979
+        free_async_command(cmd);
97a979
+        g_list_free_full(devices, free);
97a979
 
97a979
-    free_async_command(cmd);
97a979
-    g_list_free_full(devices, free);
97a979
+    } else { // Device found, schedule it for fencing
97a979
+        cmd->device_list = devices;
97a979
+        cmd->device_next = devices->next;
97a979
+        schedule_stonith_command(cmd, device);
97a979
+    }
97a979
 }
97a979
 
97a979
 static int
97a979
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
97a979
index ffaf60018..b09d2865e 100644
97a979
--- a/daemons/fenced/fenced_remote.c
97a979
+++ b/daemons/fenced/fenced_remote.c
97a979
@@ -996,7 +996,7 @@ stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
97a979
 
97a979
     remote_op_done(op, msg, pcmk_ok, FALSE);
97a979
 
97a979
-    /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
97a979
+    // Replies are sent via done_cb -> send_async_reply() -> do_local_reply()
97a979
     return -EINPROGRESS;
97a979
 }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From c67b6bfbe0baa1253058417ddfb9bc4cf0844e27 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 7 Oct 2021 17:25:38 -0500
97a979
Subject: [PATCH 08/12] Refactor: fencer: pass result object when building
97a979
 async reply
97a979
97a979
... via stonith_construct_async_reply(), instead of passing a mapped legacy rc
97a979
and action output separately, which will be helpful when we add the exit reason
97a979
to the reply. Also, drop the "stonith_" prefix since the function is static, and
97a979
drop an unused argument.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 33 +++++++++++++++-----------------
97a979
 1 file changed, 15 insertions(+), 18 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index e5f8162ce..6bc12e6c4 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -112,8 +112,8 @@ typedef struct async_command_s {
97a979
     stonith_device_t *activating_on;
97a979
 } async_command_t;
97a979
 
97a979
-static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
97a979
-                                              xmlNode * data, int rc);
97a979
+static xmlNode *construct_async_reply(async_command_t *cmd,
97a979
+                                      const pcmk__action_result_t *result);
97a979
 
97a979
 static gboolean
97a979
 is_action_required(const char *action, stonith_device_t *device)
97a979
@@ -2399,7 +2399,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
     output = result->action_stdout;
97a979
     rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
 
97a979
-    reply = stonith_construct_async_reply(cmd, output, NULL, rc);
97a979
+    reply = construct_async_reply(cmd, result);
97a979
 
97a979
     // Only replies for certain actions are broadcast
97a979
     if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status",
97a979
@@ -2732,17 +2732,20 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
97a979
     return reply;
97a979
 }
97a979
 
97a979
+/*!
97a979
+ * \internal
97a979
+ * \brief Build an XML reply to an asynchronous fencing command
97a979
+ *
97a979
+ * \param[in] cmd     Fencing command that reply is for
97a979
+ * \param[in] result  Command result
97a979
+ */
97a979
 static xmlNode *
97a979
-stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
97a979
+construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
97a979
 {
97a979
-    xmlNode *reply = NULL;
97a979
-
97a979
-    crm_trace("Creating a basic reply");
97a979
-    reply = create_xml_node(NULL, T_STONITH_REPLY);
97a979
+    xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
97a979
 
97a979
     crm_xml_add(reply, "st_origin", __func__);
97a979
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
97a979
-
97a979
     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
97a979
     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
97a979
     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
97a979
@@ -2753,15 +2756,9 @@ stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode
97a979
     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
97a979
     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
97a979
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
97a979
-
97a979
-    crm_xml_add_int(reply, F_STONITH_RC, rc);
97a979
-
97a979
-    crm_xml_add(reply, "st_output", output);
97a979
-
97a979
-    if (data != NULL) {
97a979
-        crm_info("Attaching reply output");
97a979
-        add_message_xml(reply, F_STONITH_CALLDATA, data);
97a979
-    }
97a979
+    crm_xml_add_int(reply, F_STONITH_RC,
97a979
+                    pcmk_rc2legacy(stonith__result2rc(result)));
97a979
+    crm_xml_add(reply, "st_output", result->action_stdout);
97a979
     return reply;
97a979
 }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 2686caeb3b74f687ddd86a4e483250ca8096ba7c Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Tue, 19 Oct 2021 18:27:31 -0500
97a979
Subject: [PATCH 09/12] Log: fencer: improve messages for asynchronous results
97a979
97a979
Now that we have the full result object, pass it to log_async_result().
97a979
Instead of logging a mapped legacy rc, log the execution status or exit status
97a979
as appropriate, along with the exit reason.
97a979
---
97a979
 daemons/fenced/fenced_commands.c | 43 +++++++++++++++++---------------
97a979
 1 file changed, 23 insertions(+), 20 deletions(-)
97a979
97a979
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
97a979
index 6bc12e6c4..9d06c68dc 100644
97a979
--- a/daemons/fenced/fenced_commands.c
97a979
+++ b/daemons/fenced/fenced_commands.c
97a979
@@ -2305,15 +2305,14 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
97a979
  * \brief Log the result of an asynchronous command
97a979
  *
97a979
  * \param[in] cmd        Command the result is for
97a979
- * \param[in] rc         Legacy return code corresponding to result
97a979
+ * \param[in] result     Result of command
97a979
  * \param[in] pid        Process ID of command, if available
97a979
  * \param[in] next       Alternate device that will be tried if command failed
97a979
- * \param[in] output     Command output, if any
97a979
  * \param[in] op_merged  Whether this command was merged with an earlier one
97a979
  */
97a979
 static void
97a979
-log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
97a979
-                 const char *output, gboolean op_merged)
97a979
+log_async_result(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
+                 int pid, const char *next, bool op_merged)
97a979
 {
97a979
     int log_level = LOG_ERR;
97a979
     int output_log_level = LOG_NEVER;
97a979
@@ -2321,17 +2320,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
97a979
 
97a979
     GString *msg = g_string_sized_new(80); // Reasonable starting size
97a979
 
97a979
-    // Choose log levels appropriately
97a979
-    if (rc == 0) { // Success
97a979
+    // Choose log levels appropriately if we have a result
97a979
+    if ((result->execution_status == PCMK_EXEC_DONE)
97a979
+        && (result->exit_status == CRM_EX_OK))  { // Success
97a979
         log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE;
97a979
-        if ((output != NULL)
97a979
+        if ((result->action_stdout != NULL)
97a979
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
97a979
             output_log_level = LOG_DEBUG;
97a979
         }
97a979
         next = NULL;
97a979
     } else { // Failure
97a979
         log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR;
97a979
-        if ((output != NULL)
97a979
+        if ((result->action_stdout != NULL)
97a979
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
97a979
             output_log_level = LOG_WARNING;
97a979
         }
97a979
@@ -2347,10 +2347,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
97a979
     }
97a979
     g_string_append_printf(msg, "using %s ", cmd->device);
97a979
 
97a979
-    // Add result
97a979
-    g_string_append_printf(msg, "returned %d (%s)", rc, pcmk_strerror(rc));
97a979
+    // Add exit status or execution status as appropriate
97a979
+    if (result->execution_status == PCMK_EXEC_DONE) {
97a979
+        g_string_append_printf(msg, "returned %d", result->exit_status);
97a979
+    } else {
97a979
+        g_string_append_printf(msg, "could not be executed: %s",
97a979
+                               pcmk_exec_status_str(result->execution_status));
97a979
+    }
97a979
 
97a979
-    // Add next device if appropriate
97a979
+    // Add exit reason and next device if appropriate
97a979
+    if (result->exit_reason != NULL) {
97a979
+        g_string_append_printf(msg, " (%s)", result->exit_reason);
97a979
+    }
97a979
     if (next != NULL) {
97a979
         g_string_append_printf(msg, ", retrying with %s", next);
97a979
     }
97a979
@@ -2371,7 +2379,7 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
97a979
     if (output_log_level != LOG_NEVER) {
97a979
         char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
97a979
 
97a979
-        crm_log_output(output_log_level, prefix, output);
97a979
+        crm_log_output(output_log_level, prefix, result->action_stdout);
97a979
         free(prefix);
97a979
     }
97a979
 }
97a979
@@ -2391,14 +2399,9 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
 {
97a979
     xmlNode *reply = NULL;
97a979
     gboolean bcast = FALSE;
97a979
-    const char *output = NULL;
97a979
-    int rc = pcmk_ok;
97a979
 
97a979
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
97a979
 
97a979
-    output = result->action_stdout;
97a979
-    rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
-
97a979
     reply = construct_async_reply(cmd, result);
97a979
 
97a979
     // Only replies for certain actions are broadcast
97a979
@@ -2412,7 +2415,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
         bcast = TRUE;
97a979
     }
97a979
 
97a979
-    log_async_result(cmd, rc, pid, NULL, output, merged);
97a979
+    log_async_result(cmd, result, pid, NULL, merged);
97a979
     crm_log_xml_trace(reply, "Reply");
97a979
 
97a979
     if (merged) {
97a979
@@ -2436,6 +2439,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
97a979
     if (stand_alone) {
97a979
         /* Do notification with a clean data object */
97a979
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
97a979
+        int rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
 
97a979
         crm_xml_add_int(notify_data, F_STONITH_RC, rc);
97a979
         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
97a979
@@ -2521,8 +2525,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
97a979
 
97a979
     /* this operation requires more fencing, hooray! */
97a979
     if (next_device) {
97a979
-        log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
97a979
-                         next_device->id, result->action_stdout, FALSE);
97a979
+        log_async_result(cmd, result, pid, next_device->id, false);
97a979
         schedule_stonith_command(cmd, next_device);
97a979
         /* Prevent cmd from being freed */
97a979
         cmd = NULL;
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 9f9dea518da50f629589d505ea0f330a47111d76 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 28 Oct 2021 13:29:31 -0500
97a979
Subject: [PATCH 10/12] Test: cts-fencing: update expected log messages
97a979
97a979
... which now log the original exit status rather than a mapped legacy rc
97a979
---
97a979
 cts/cts-fencing.in | 28 ++++++++++++++--------------
97a979
 1 file changed, 14 insertions(+), 14 deletions(-)
97a979
97a979
diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in
97a979
index babfb6351..5cd9f7b8f 100644
97a979
--- a/cts/cts-fencing.in
97a979
+++ b/cts/cts-fencing.in
97a979
@@ -886,7 +886,7 @@ class Tests(object):
97a979
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
97a979
 
97a979
             test.add_stonith_log_pattern("Total timeout set to 40")
97a979
-            test.add_stonith_log_pattern("targeting node3 using false returned -201")
97a979
+            test.add_stonith_log_pattern("targeting node3 using false returned 1")
97a979
             test.add_stonith_log_pattern("targeting node3 using true returned 0")
97a979
 
97a979
         # test what happens when the first fencing level fails.
97a979
@@ -920,8 +920,8 @@ class Tests(object):
97a979
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3")
97a979
 
97a979
             test.add_stonith_log_pattern("Total timeout set to 18")
97a979
-            test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
97a979
-            test.add_stonith_log_pattern("targeting node3 using false2 returned -201")
97a979
+            test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
97a979
+            test.add_stonith_log_pattern("targeting node3 using false2 returned 1")
97a979
             test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
97a979
             test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
97a979
 
97a979
@@ -987,7 +987,7 @@ class Tests(object):
97a979
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
97a979
 
97a979
             test.add_stonith_log_pattern("Total timeout set to 8")
97a979
-            test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
97a979
+            test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
97a979
             test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ")
97a979
             test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
97a979
             test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
97a979
@@ -1147,7 +1147,7 @@ class Tests(object):
97a979
                          "--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
97a979
             test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
97a979
             test.add_stonith_log_pattern("does not support reboot")
97a979
-            test.add_stonith_log_pattern("using true1 returned 0 (OK)")
97a979
+            test.add_stonith_log_pattern("using true1 returned 0")
97a979
 
97a979
         # make sure reboot is used when reboot action is advertised
97a979
         for test_type in test_types:
97a979
@@ -1158,7 +1158,7 @@ class Tests(object):
97a979
                          "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
97a979
             test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
97a979
             test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
97a979
-            test.add_stonith_log_pattern("using true1 returned 0 (OK)")
97a979
+            test.add_stonith_log_pattern("using true1 returned 0")
97a979
 
97a979
         # make sure requested fencing delay is applied only for the first device in the first level
97a979
         # make sure static delay from pcmk_delay_base is added
97a979
@@ -1240,8 +1240,8 @@ class Tests(object):
97a979
                      '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
97a979
         # both devices should be executed
97a979
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
97a979
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
97a979
+        test.add_stonith_log_pattern("using true1 returned 0")
97a979
+        test.add_stonith_log_pattern("using true2 returned 0")
97a979
 
97a979
         ### verify unfencing using automatic unfencing fails if any of the required agents fail
97a979
         test = self.new_test("cpg_unfence_required_2",
97a979
@@ -1264,8 +1264,8 @@ class Tests(object):
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
97a979
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
97a979
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
97a979
+        test.add_stonith_log_pattern("using true1 returned 0")
97a979
+        test.add_stonith_log_pattern("using true2 returned 0")
97a979
 
97a979
         ### verify unfencing using automatic devices with topology
97a979
         test = self.new_test("cpg_unfence_required_4",
97a979
@@ -1296,10 +1296,10 @@ class Tests(object):
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname))
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname))
97a979
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
97a979
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
97a979
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
97a979
-        test.add_stonith_log_pattern("using true3 returned 0 (OK)")
97a979
-        test.add_stonith_log_pattern("using true4 returned 0 (OK)")
97a979
+        test.add_stonith_log_pattern("using true1 returned 0")
97a979
+        test.add_stonith_log_pattern("using true2 returned 0")
97a979
+        test.add_stonith_log_pattern("using true3 returned 0")
97a979
+        test.add_stonith_log_pattern("using true4 returned 0")
97a979
 
97a979
     def build_unfence_on_target_tests(self):
97a979
         """ Register tests that verify unfencing that runs on the target """
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From be72166ed9ccb53c218529783660503df95da719 Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Thu, 16 Sep 2021 16:50:23 -0500
97a979
Subject: [PATCH 11/12] Log: libcrmservice: downgrade failed action messages
97a979
97a979
Previously, we would often get duplicate log messages for failed actions,
97a979
from the service library and again from its callers.
97a979
97a979
Now that the service library tracks and provides exit reasons, callers can log
97a979
sufficient detail with better context, so downgrade the library's messages to
97a979
info level or lower. Similarly, avoid duplicate logs of process output.
97a979
97a979
Certain messages (such as out-of-memory) remain at higher severity.
97a979
---
97a979
 daemons/controld/controld_execd.c | 15 +++---
97a979
 lib/fencing/st_client.c           | 11 ++---
97a979
 lib/services/services.c           | 14 +++---
97a979
 lib/services/services_linux.c     | 80 ++++++++++++++++---------------
97a979
 lib/services/systemd.c            | 20 ++++----
97a979
 lib/services/upstart.c            | 19 ++++----
97a979
 6 files changed, 80 insertions(+), 79 deletions(-)
97a979
97a979
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
97a979
index bded6e6b6..3ddff6e13 100644
97a979
--- a/daemons/controld/controld_execd.c
97a979
+++ b/daemons/controld/controld_execd.c
97a979
@@ -2684,16 +2684,15 @@ log_executor_event(lrmd_event_data_t *op, const char *op_key,
97a979
     do_crm_log(log_level, "%s", str->str);
97a979
     g_string_free(str, TRUE);
97a979
 
97a979
-    if (op->output != NULL) {
97a979
-        char *prefix = crm_strdup_printf("%s-" PCMK__OP_FMT ":%d", node_name,
97a979
+    /* The services library has already logged the output at info or debug
97a979
+     * level, so just raise to notice if it looks like a failure.
97a979
+     */
97a979
+    if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
97a979
+        char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
97a979
                                          op->rsc_id, op->op_type,
97a979
-                                         op->interval_ms, op->call_id);
97a979
+                                         op->interval_ms, node_name);
97a979
 
97a979
-        if (op->rc) {
97a979
-            crm_log_output(LOG_NOTICE, prefix, op->output);
97a979
-        } else {
97a979
-            crm_log_output(LOG_DEBUG, prefix, op->output);
97a979
-        }
97a979
+        crm_log_output(LOG_NOTICE, prefix, op->output);
97a979
         free(prefix);
97a979
     }
97a979
 }
97a979
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
97a979
index 3d4127eff..2fbff7f24 100644
97a979
--- a/lib/fencing/st_client.c
97a979
+++ b/lib/fencing/st_client.c
97a979
@@ -276,14 +276,9 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
97a979
 static void
97a979
 log_action(stonith_action_t *action, pid_t pid)
97a979
 {
97a979
-    if (action->result.action_stdout != NULL) {
97a979
-        /* Logging the whole string confuses syslog when the string is xml */
97a979
-        char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
97a979
-
97a979
-        crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
97a979
-        free(prefix);
97a979
-    }
97a979
-
97a979
+    /* The services library has already logged the output at info or debug
97a979
+     * level, so just raise to warning for stderr.
97a979
+     */
97a979
     if (action->result.action_stderr != NULL) {
97a979
         /* Logging the whole string confuses syslog when the string is xml */
97a979
         char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
97a979
diff --git a/lib/services/services.c b/lib/services/services.c
97a979
index 86a0a213c..cf8bbc70e 100644
97a979
--- a/lib/services/services.c
97a979
+++ b/lib/services/services.c
97a979
@@ -319,13 +319,13 @@ services__create_resource_action(const char *name, const char *standard,
97a979
         rc = services__nagios_prepare(op);
97a979
 #endif
97a979
     } else {
97a979
-        crm_err("Unknown resource standard: %s", op->standard);
97a979
+        crm_info("Unknown resource standard: %s", op->standard);
97a979
         rc = ENOENT;
97a979
     }
97a979
 
97a979
     if (rc != pcmk_rc_ok) {
97a979
-        crm_err("Cannot prepare %s operation for %s: %s",
97a979
-                action, name, strerror(rc));
97a979
+        crm_info("Cannot prepare %s operation for %s: %s",
97a979
+                 action, name, strerror(rc));
97a979
         services__handle_exec_error(op, rc);
97a979
     }
97a979
     return op;
97a979
@@ -967,14 +967,14 @@ execute_metadata_action(svc_action_t *op)
97a979
     const char *class = op->standard;
97a979
 
97a979
     if (op->agent == NULL) {
97a979
-        crm_err("meta-data requested without specifying agent");
97a979
+        crm_info("Meta-data requested without specifying agent");
97a979
         services__set_result(op, services__generic_error(op),
97a979
                              PCMK_EXEC_ERROR_FATAL, "Agent not specified");
97a979
         return EINVAL;
97a979
     }
97a979
 
97a979
     if (class == NULL) {
97a979
-        crm_err("meta-data requested for agent %s without specifying class",
97a979
+        crm_info("Meta-data requested for agent %s without specifying class",
97a979
                 op->agent);
97a979
         services__set_result(op, services__generic_error(op),
97a979
                              PCMK_EXEC_ERROR_FATAL,
97a979
@@ -986,8 +986,8 @@ execute_metadata_action(svc_action_t *op)
97a979
         class = resources_find_service_class(op->agent);
97a979
     }
97a979
     if (class == NULL) {
97a979
-        crm_err("meta-data requested for %s, but could not determine class",
97a979
-                op->agent);
97a979
+        crm_info("Meta-data requested for %s, but could not determine class",
97a979
+                 op->agent);
97a979
         services__set_result(op, services__generic_error(op),
97a979
                              PCMK_EXEC_ERROR_HARD,
97a979
                              "Agent standard could not be determined");
97a979
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
97a979
index b2ff27a0d..9a4c6cf80 100644
97a979
--- a/lib/services/services_linux.c
97a979
+++ b/lib/services/services_linux.c
97a979
@@ -64,8 +64,8 @@ sigchld_setup(struct sigchld_data_s *data)
97a979
 
97a979
     // Block SIGCHLD (saving previous set of blocked signals to restore later)
97a979
     if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=sigprocmask", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=sigprocmask", pcmk_strerror(errno));
97a979
         return false;
97a979
     }
97a979
     return true;
97a979
@@ -81,8 +81,8 @@ sigchld_open(struct sigchld_data_s *data)
97a979
 
97a979
     fd = signalfd(-1, &(data->mask), SFD_NONBLOCK);
97a979
     if (fd < 0) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=signalfd", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=signalfd", pcmk_strerror(errno));
97a979
     }
97a979
     return fd;
97a979
 }
97a979
@@ -108,8 +108,8 @@ sigchld_received(int fd)
97a979
     }
97a979
     s = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
97a979
     if (s != sizeof(struct signalfd_siginfo)) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=read", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=read", pcmk_strerror(errno));
97a979
 
97a979
     } else if (fdsi.ssi_signo == SIGCHLD) {
97a979
         return true;
97a979
@@ -149,8 +149,8 @@ sigchld_handler()
97a979
     if ((last_sigchld_data != NULL)
97a979
         && (last_sigchld_data->pipe_fd[1] >= 0)
97a979
         && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=write", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=write", pcmk_strerror(errno));
97a979
     }
97a979
 }
97a979
 
97a979
@@ -162,19 +162,19 @@ sigchld_setup(struct sigchld_data_s *data)
97a979
     data->pipe_fd[0] = data->pipe_fd[1] = -1;
97a979
 
97a979
     if (pipe(data->pipe_fd) == -1) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=pipe", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=pipe", pcmk_strerror(errno));
97a979
         return false;
97a979
     }
97a979
 
97a979
     rc = pcmk__set_nonblocking(data->pipe_fd[0]);
97a979
     if (rc != pcmk_rc_ok) {
97a979
-        crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
97a979
+        crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
97a979
                  pcmk_rc_str(rc), rc);
97a979
     }
97a979
     rc = pcmk__set_nonblocking(data->pipe_fd[1]);
97a979
     if (rc != pcmk_rc_ok) {
97a979
-        crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
97a979
+        crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
97a979
                  pcmk_rc_str(rc), rc);
97a979
     }
97a979
 
97a979
@@ -183,8 +183,8 @@ sigchld_setup(struct sigchld_data_s *data)
97a979
     data->sa.sa_flags = 0;
97a979
     sigemptyset(&(data->sa.sa_mask));
97a979
     if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) {
97a979
-        crm_err("Wait for child process completion failed: %s "
97a979
-                CRM_XS " source=sigaction", pcmk_strerror(errno));
97a979
+        crm_info("Wait for child process completion failed: %s "
97a979
+                 CRM_XS " source=sigaction", pcmk_strerror(errno));
97a979
     }
97a979
 
97a979
     // Remember data for use in signal handler
97a979
@@ -585,7 +585,11 @@ log_op_output(svc_action_t *op)
97a979
 {
97a979
     char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid);
97a979
 
97a979
-    crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
97a979
+    /* The library caller has better context to know how important the output
97a979
+     * is, so log it at info and debug severity here. They can log it again at
97a979
+     * higher severity if appropriate.
97a979
+     */
97a979
+    crm_log_output(LOG_INFO, prefix, op->stderr_data);
97a979
     strcpy(prefix + strlen(prefix) - strlen("error output"), "output");
97a979
     crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
97a979
     free(prefix);
97a979
@@ -673,7 +677,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
97a979
         parse_exit_reason_from_stderr(op);
97a979
 
97a979
     } else if (mainloop_child_timeout(p)) {
97a979
-        crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
97a979
+        crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
97a979
         services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
97a979
                              "Process did not exit within specified timeout");
97a979
 
97a979
@@ -686,7 +690,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
97a979
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL);
97a979
 
97a979
     } else {
97a979
-        crm_warn("%s[%d] terminated with signal %d (%s)",
97a979
+        crm_info("%s[%d] terminated with signal %d (%s)",
97a979
                  op->id, op->pid, signo, strsignal(signo));
97a979
         services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
97a979
                              "Process interrupted by signal");
97a979
@@ -908,12 +912,12 @@ action_launch_child(svc_action_t *op)
97a979
         sp.sched_priority = 0;
97a979
 
97a979
         if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
97a979
-            crm_warn("Could not reset scheduling policy for %s", op->id);
97a979
+            crm_info("Could not reset scheduling policy for %s", op->id);
97a979
         }
97a979
     }
97a979
 #endif
97a979
     if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
97a979
-        crm_warn("Could not reset process priority for %s", op->id);
97a979
+        crm_info("Could not reset process priority for %s", op->id);
97a979
     }
97a979
 
97a979
     /* Man: The call setpgrp() is equivalent to setpgid(0,0)
97a979
@@ -941,7 +945,7 @@ action_launch_child(svc_action_t *op)
97a979
         } else {
97a979
             crm_err("Considering %s unconfigured "
97a979
                     "because unable to load CIB secrets: %s",
97a979
-                     op->rsc, pcmk_rc_str(rc));
97a979
+                    op->rsc, pcmk_rc_str(rc));
97a979
             exit_child(op, services__configuration_error(op, false),
97a979
                        "Unable to load CIB secrets");
97a979
         }
97a979
@@ -1043,7 +1047,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
97a979
 
97a979
                 } else if (wait_rc < 0) {
97a979
                     wait_reason = pcmk_rc_str(errno);
97a979
-                    crm_warn("Wait for completion of %s[%d] failed: %s "
97a979
+                    crm_info("Wait for completion of %s[%d] failed: %s "
97a979
                              CRM_XS " source=waitpid",
97a979
                              op->id, op->pid, wait_reason);
97a979
                     wait_rc = 0; // Act as if process is still running
97a979
@@ -1057,8 +1061,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
97a979
 
97a979
         } else if ((poll_rc < 0) && (errno != EINTR)) {
97a979
             wait_reason = pcmk_rc_str(errno);
97a979
-            crm_err("Wait for completion of %s[%d] failed: %s "
97a979
-                    CRM_XS " source=poll", op->id, op->pid, wait_reason);
97a979
+            crm_info("Wait for completion of %s[%d] failed: %s "
97a979
+                     CRM_XS " source=poll", op->id, op->pid, wait_reason);
97a979
             break;
97a979
         }
97a979
 
97a979
@@ -1078,7 +1082,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
97a979
             services__set_result(op, services__generic_error(op),
97a979
                                  PCMK_EXEC_TIMEOUT,
97a979
                                  "Process did not exit within specified timeout");
97a979
-            crm_warn("%s[%d] timed out after %dms",
97a979
+            crm_info("%s[%d] timed out after %dms",
97a979
                      op->id, op->pid, op->timeout);
97a979
 
97a979
         } else {
97a979
@@ -1110,8 +1114,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
97a979
 
97a979
         services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
97a979
                              "Process interrupted by signal");
97a979
-        crm_err("%s[%d] terminated with signal %d (%s)",
97a979
-                op->id, op->pid, signo, strsignal(signo));
97a979
+        crm_info("%s[%d] terminated with signal %d (%s)",
97a979
+                 op->id, op->pid, signo, strsignal(signo));
97a979
 
97a979
 #ifdef WCOREDUMP
97a979
         if (WCOREDUMP(status)) {
97a979
@@ -1155,7 +1159,7 @@ services__execute_file(svc_action_t *op)
97a979
     // Catch common failure conditions early
97a979
     if (stat(op->opaque->exec, &st) != 0) {
97a979
         rc = errno;
97a979
-        crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
97a979
+        crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
97a979
                  op->opaque->exec, pcmk_strerror(rc), rc);
97a979
         services__handle_exec_error(op, rc);
97a979
         goto done;
97a979
@@ -1163,8 +1167,8 @@ services__execute_file(svc_action_t *op)
97a979
 
97a979
     if (pipe(stdout_fd) < 0) {
97a979
         rc = errno;
97a979
-        crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
97a979
-                op->opaque->exec, pcmk_strerror(rc), rc);
97a979
+        crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
97a979
+                 op->opaque->exec, pcmk_strerror(rc), rc);
97a979
         services__handle_exec_error(op, rc);
97a979
         goto done;
97a979
     }
97a979
@@ -1174,8 +1178,8 @@ services__execute_file(svc_action_t *op)
97a979
 
97a979
         close_pipe(stdout_fd);
97a979
 
97a979
-        crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
97a979
-                op->opaque->exec, pcmk_strerror(rc), rc);
97a979
+        crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
97a979
+                 op->opaque->exec, pcmk_strerror(rc), rc);
97a979
         services__handle_exec_error(op, rc);
97a979
         goto done;
97a979
     }
97a979
@@ -1187,8 +1191,8 @@ services__execute_file(svc_action_t *op)
97a979
             close_pipe(stdout_fd);
97a979
             close_pipe(stderr_fd);
97a979
 
97a979
-            crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
97a979
-                    op->opaque->exec, pcmk_strerror(rc), rc);
97a979
+            crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
97a979
+                     op->opaque->exec, pcmk_strerror(rc), rc);
97a979
             services__handle_exec_error(op, rc);
97a979
             goto done;
97a979
         }
97a979
@@ -1212,8 +1216,8 @@ services__execute_file(svc_action_t *op)
97a979
             close_pipe(stdout_fd);
97a979
             close_pipe(stderr_fd);
97a979
 
97a979
-            crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
97a979
-                    op->opaque->exec, pcmk_strerror(rc), rc);
97a979
+            crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
97a979
+                     op->opaque->exec, pcmk_strerror(rc), rc);
97a979
             services__handle_exec_error(op, rc);
97a979
             if (op->synchronous) {
97a979
                 sigchld_cleanup(&data);
97a979
@@ -1271,7 +1275,7 @@ services__execute_file(svc_action_t *op)
97a979
     op->opaque->stdout_fd = stdout_fd[0];
97a979
     rc = pcmk__set_nonblocking(op->opaque->stdout_fd);
97a979
     if (rc != pcmk_rc_ok) {
97a979
-        crm_warn("Could not set '%s' output non-blocking: %s "
97a979
+        crm_info("Could not set '%s' output non-blocking: %s "
97a979
                  CRM_XS " rc=%d",
97a979
                  op->opaque->exec, pcmk_rc_str(rc), rc);
97a979
     }
97a979
@@ -1279,7 +1283,7 @@ services__execute_file(svc_action_t *op)
97a979
     op->opaque->stderr_fd = stderr_fd[0];
97a979
     rc = pcmk__set_nonblocking(op->opaque->stderr_fd);
97a979
     if (rc != pcmk_rc_ok) {
97a979
-        crm_warn("Could not set '%s' error output non-blocking: %s "
97a979
+        crm_info("Could not set '%s' error output non-blocking: %s "
97a979
                  CRM_XS " rc=%d",
97a979
                  op->opaque->exec, pcmk_rc_str(rc), rc);
97a979
     }
97a979
@@ -1290,7 +1294,7 @@ services__execute_file(svc_action_t *op)
97a979
         // as long as no other standard uses stdin_fd assume stonith
97a979
         rc = pcmk__set_nonblocking(op->opaque->stdin_fd);
97a979
         if (rc != pcmk_rc_ok) {
97a979
-            crm_warn("Could not set '%s' input non-blocking: %s "
97a979
+            crm_info("Could not set '%s' input non-blocking: %s "
97a979
                     CRM_XS " fd=%d,rc=%d", op->opaque->exec,
97a979
                     pcmk_rc_str(rc), op->opaque->stdin_fd, rc);
97a979
         }
97a979
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
97a979
index 6f5bef960..8e9fff484 100644
97a979
--- a/lib/services/systemd.c
97a979
+++ b/lib/services/systemd.c
97a979
@@ -232,7 +232,8 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
97a979
     }
97a979
 
97a979
     if (pcmk_dbus_find_error(pending, reply, &error)) {
97a979
-        crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);
97a979
+        crm_warn("Could not issue systemd reload %d: %s",
97a979
+                 reload_count, error.message);
97a979
         dbus_error_free(&error);
97a979
 
97a979
     } else {
97a979
@@ -291,8 +292,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
97a979
                              PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
97a979
     }
97a979
 
97a979
-    crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s",
97a979
-            op->action, op->agent, crm_str(op->rsc), error->message);
97a979
+    crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s",
97a979
+             op->action, op->agent, crm_str(op->rsc), error->message);
97a979
 }
97a979
 
97a979
 /*!
97a979
@@ -325,11 +326,11 @@ execute_after_loadunit(DBusMessage *reply, svc_action_t *op)
97a979
         if (op != NULL) {
97a979
             services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
97a979
                                  "systemd DBus method had unexpected reply");
97a979
-            crm_err("Could not load systemd unit %s for %s: "
97a979
-                    "DBus reply has unexpected type", op->agent, op->id);
97a979
+            crm_info("Could not load systemd unit %s for %s: "
97a979
+                     "DBus reply has unexpected type", op->agent, op->id);
97a979
         } else {
97a979
-            crm_err("Could not load systemd unit: "
97a979
-                    "DBus reply has unexpected type");
97a979
+            crm_info("Could not load systemd unit: "
97a979
+                     "DBus reply has unexpected type");
97a979
         }
97a979
 
97a979
     } else {
97a979
@@ -688,7 +689,7 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
97a979
 
97a979
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
97a979
                                      __func__, __LINE__)) {
97a979
-        crm_warn("DBus request for %s of %s succeeded but "
97a979
+        crm_info("DBus request for %s of %s succeeded but "
97a979
                  "return type was unexpected", op->action, crm_str(op->rsc));
97a979
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE,
97a979
                              "systemd DBus method had unexpected reply");
97a979
@@ -981,7 +982,8 @@ systemd_timeout_callback(gpointer p)
97a979
     svc_action_t * op = p;
97a979
 
97a979
     op->opaque->timerid = 0;
97a979
-    crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
97a979
+    crm_info("%s action for systemd unit %s named '%s' timed out",
97a979
+             op->action, op->agent, op->rsc);
97a979
     services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
97a979
                          "Systemd action did not complete within specified timeout");
97a979
     services__finalize_async_op(op);
97a979
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
97a979
index 2fdc229ad..2ece803e1 100644
97a979
--- a/lib/services/upstart.c
97a979
+++ b/lib/services/upstart.c
97a979
@@ -308,21 +308,21 @@ get_first_instance(const gchar * job, int timeout)
97a979
     dbus_message_unref(msg);
97a979
 
97a979
     if (dbus_error_is_set(&error)) {
97a979
-        crm_err("Call to %s failed: %s", method, error.message);
97a979
+        crm_info("Call to %s failed: %s", method, error.message);
97a979
         dbus_error_free(&error);
97a979
         goto done;
97a979
 
97a979
     } else if(reply == NULL) {
97a979
-        crm_err("Call to %s failed: no reply", method);
97a979
+        crm_info("Call to %s failed: no reply", method);
97a979
         goto done;
97a979
 
97a979
     } else if (!dbus_message_iter_init(reply, &args)) {
97a979
-        crm_err("Call to %s failed: Message has no arguments", method);
97a979
+        crm_info("Call to %s failed: Message has no arguments", method);
97a979
         goto done;
97a979
     }
97a979
 
97a979
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) {
97a979
-        crm_err("Call to %s failed: Message has invalid arguments", method);
97a979
+        crm_info("Call to %s failed: Message has invalid arguments", method);
97a979
         goto done;
97a979
     }
97a979
 
97a979
@@ -432,8 +432,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
97a979
         return;
97a979
     }
97a979
 
97a979
-    crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s",
97a979
-            op->action, op->agent, crm_str(op->rsc), error->message);
97a979
+    crm_info("DBus request for %s of Upstart job %s for resource %s failed: %s",
97a979
+             op->action, op->agent, crm_str(op->rsc), error->message);
97a979
 }
97a979
 
97a979
 /*!
97a979
@@ -468,7 +468,7 @@ job_method_complete(DBusPendingCall *pending, void *user_data)
97a979
 
97a979
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
97a979
                                      __func__, __LINE__)) {
97a979
-        crm_warn("DBus request for %s of %s succeeded but "
97a979
+        crm_info("DBus request for %s of %s succeeded but "
97a979
                  "return type was unexpected", op->action, crm_str(op->rsc));
97a979
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
97a979
 
97a979
@@ -667,7 +667,8 @@ services__execute_upstart(svc_action_t *op)
97a979
 
97a979
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
97a979
                                      __func__, __LINE__)) {
97a979
-        crm_warn("Call to %s passed but return type was unexpected", op->action);
97a979
+        crm_info("Call to %s passed but return type was unexpected",
97a979
+                 op->action);
97a979
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
97a979
 
97a979
     } else {
97a979
@@ -675,7 +676,7 @@ services__execute_upstart(svc_action_t *op)
97a979
 
97a979
         dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path,
97a979
                               DBUS_TYPE_INVALID);
97a979
-        crm_info("Call to %s passed: %s", op->action, path);
97a979
+        crm_debug("Call to %s passed: %s", op->action, path);
97a979
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
97a979
     }
97a979
 
97a979
-- 
97a979
2.27.0
97a979
97a979
97a979
From 39f6861c72eb9dd76d2cf3da287fe7485615631b Mon Sep 17 00:00:00 2001
97a979
From: Ken Gaillot <kgaillot@redhat.com>
97a979
Date: Mon, 8 Nov 2021 09:43:38 -0600
97a979
Subject: [PATCH 12/12] Low: fencing: avoid use-after-free with new result
97a979
 object
97a979
97a979
itnroduced by 153c9b552 (not released)
97a979
---
97a979
 lib/fencing/st_rhcs.c | 6 ++++--
97a979
 1 file changed, 4 insertions(+), 2 deletions(-)
97a979
97a979
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
97a979
index 23e694975..6c8cbedc7 100644
97a979
--- a/lib/fencing/st_rhcs.c
97a979
+++ b/lib/fencing/st_rhcs.c
97a979
@@ -143,15 +143,17 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
97a979
     if (result->execution_status != PCMK_EXEC_DONE) {
97a979
         crm_warn("Could not execute metadata action for %s: %s",
97a979
                  agent, pcmk_exec_status_str(result->execution_status));
97a979
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
         stonith__destroy_action(action);
97a979
-        return pcmk_rc2legacy(stonith__result2rc(result));
97a979
+        return rc;
97a979
     }
97a979
 
97a979
     if (result->exit_status != CRM_EX_OK) {
97a979
         crm_warn("Metadata action for %s returned error code %d",
97a979
                  agent, result->exit_status);
97a979
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
97a979
         stonith__destroy_action(action);
97a979
-        return pcmk_rc2legacy(stonith__result2rc(result));
97a979
+        return rc;
97a979
     }
97a979
 
97a979
     if (result->action_stdout == NULL) {
97a979
-- 
97a979
2.27.0
97a979