Blame SOURCES/002-fencing-reasons.patch

533c21
From 95b4f87aae5fb2cf771cf9a8f8e5420b65fb213f Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Sep 2021 10:47:51 -0500
533c21
Subject: [PATCH 01/12] Refactor: fencing: use pcmk__action_result_t in
533c21
 stonith_action_t
533c21
533c21
stonith_action_t previously had an rc member for a legacy return code, along
533c21
with output and error members for action stdout/stderr. When setting rc based
533c21
on the svc_action_t result, it used a mapping function svc_action_to_errno().
533c21
533c21
This replaces those with a pcmk__action_result_t member, which means we now
533c21
track the exit status and execution status as originally set by libcrmservice,
533c21
rather than the mapped rc. The library now calls the mapping function, now
533c21
returning standard codes and called result2rc(), when calling the client
533c21
callback.
533c21
533c21
The exit_reason member is unused as of this commit.
533c21
533c21
The behavior should be identical, with the small exception of
533c21
services_action_async() failure leaving the exit status as set by the services
533c21
library, which means callers will get the result2rc() mapping of the actual
533c21
result instead of the former -ECONNABORTED.
533c21
---
533c21
 lib/fencing/st_client.c | 118 +++++++++++++++++++++++-----------------
533c21
 1 file changed, 68 insertions(+), 50 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 08adb51c6..6c607b010 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -29,6 +29,7 @@
533c21
 #include <crm/msg_xml.h>
533c21
 #include <crm/common/xml.h>
533c21
 #include <crm/common/xml_internal.h>
533c21
+#include <crm/services_internal.h>
533c21
 
533c21
 #include <crm/common/mainloop.h>
533c21
 
533c21
@@ -57,9 +58,7 @@ struct stonith_action_s {
533c21
     int max_retries;
533c21
 
533c21
     int pid;
533c21
-    int rc;
533c21
-    char *output;
533c21
-    char *error;
533c21
+    pcmk__action_result_t result;
533c21
 };
533c21
 
533c21
 typedef struct stonith_private_s {
533c21
@@ -120,6 +119,7 @@ static void stonith_connection_destroy(gpointer user_data);
533c21
 static void stonith_send_notification(gpointer data, gpointer user_data);
533c21
 static int internal_stonith_action_execute(stonith_action_t * action);
533c21
 static void log_action(stonith_action_t *action, pid_t pid);
533c21
+static int result2rc(const pcmk__action_result_t *result);
533c21
 
533c21
 /*!
533c21
  * \brief Get agent namespace by name
533c21
@@ -196,6 +196,23 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
533c21
     return st_namespace_invalid;
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Set an action's result based on services library result
533c21
+ *
533c21
+ * \param[in] action      Fence action to set result for
533c21
+ * \param[in] svc_action  Service action to get result from
533c21
+ */
533c21
+static void
533c21
+set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
533c21
+{
533c21
+    pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
533c21
+                     NULL);
533c21
+    pcmk__set_result_output(&(action->result),
533c21
+                            services__grab_stdout(svc_action),
533c21
+                            services__grab_stderr(svc_action));
533c21
+}
533c21
+
533c21
 gboolean
533c21
 stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
533c21
 {
533c21
@@ -259,19 +276,19 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
533c21
 static void
533c21
 log_action(stonith_action_t *action, pid_t pid)
533c21
 {
533c21
-    if (action->output) {
533c21
+    if (action->result.action_stdout != NULL) {
533c21
         /* Logging the whole string confuses syslog when the string is xml */
533c21
         char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
533c21
 
533c21
-        crm_log_output(LOG_TRACE, prefix, action->output);
533c21
+        crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
533c21
         free(prefix);
533c21
     }
533c21
 
533c21
-    if (action->error) {
533c21
+    if (action->result.action_stderr != NULL) {
533c21
         /* Logging the whole string confuses syslog when the string is xml */
533c21
         char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
533c21
 
533c21
-        crm_log_output(LOG_WARNING, prefix, action->error);
533c21
+        crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
533c21
         free(prefix);
533c21
     }
533c21
 }
533c21
@@ -645,8 +662,7 @@ stonith__destroy_action(stonith_action_t *action)
533c21
         if (action->svc_action) {
533c21
             services_action_free(action->svc_action);
533c21
         }
533c21
-        free(action->output);
533c21
-        free(action->error);
533c21
+        pcmk__reset_result(&(action->result));
533c21
         free(action);
533c21
     }
533c21
 }
533c21
@@ -678,15 +694,15 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
533c21
     }
533c21
     if (action != NULL) {
533c21
         if (rc) {
533c21
-            *rc = action->rc;
533c21
+            *rc = pcmk_rc2legacy(result2rc(&(action->result)));
533c21
         }
533c21
-        if (output && action->output) {
533c21
-            *output = action->output;
533c21
-            action->output = NULL; // hand off memory management to caller
533c21
+        if ((output != NULL) && (action->result.action_stdout != NULL)) {
533c21
+            *output = action->result.action_stdout;
533c21
+            action->result.action_stdout = NULL; // hand off ownership to caller
533c21
         }
533c21
-        if (error_output && action->error) {
533c21
-            *error_output = action->error;
533c21
-            action->error = NULL; // hand off memory management to caller
533c21
+        if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
533c21
+            *error_output = action->result.action_stderr;
533c21
+            action->result.action_stderr = NULL; // hand off ownership to caller
533c21
         }
533c21
     }
533c21
 }
533c21
@@ -715,6 +731,9 @@ stonith_action_create(const char *agent,
533c21
     action->timeout = action->remaining_timeout = timeout;
533c21
     action->max_retries = FAILURE_MAX_RETRIES;
533c21
 
533c21
+    pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
533c21
+                     NULL);
533c21
+
533c21
     if (device_args) {
533c21
         char buffer[512];
533c21
         const char *value = NULL;
533c21
@@ -739,7 +758,8 @@ update_remaining_timeout(stonith_action_t * action)
533c21
         crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
533c21
                  action->agent, action->action, action->max_retries);
533c21
         action->remaining_timeout = 0;
533c21
-    } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
533c21
+    } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
533c21
+               && (diff < (action->timeout * 0.7))) {
533c21
         /* only set remaining timeout period if there is 30%
533c21
          * or greater of the original timeout period left */
533c21
         action->remaining_timeout = action->timeout - diff;
533c21
@@ -750,31 +770,31 @@ update_remaining_timeout(stonith_action_t * action)
533c21
 }
533c21
 
533c21
 static int
533c21
-svc_action_to_errno(svc_action_t *svc_action) {
533c21
-    int rv = pcmk_ok;
533c21
+result2rc(const pcmk__action_result_t *result) {
533c21
+    int rc = pcmk_rc_ok;
533c21
 
533c21
-    if (svc_action->status == PCMK_EXEC_TIMEOUT) {
533c21
-            rv = -ETIME;
533c21
+    if (result->execution_status == PCMK_EXEC_TIMEOUT) {
533c21
+            rc = ETIME;
533c21
 
533c21
-    } else if (svc_action->rc != PCMK_OCF_OK) {
533c21
+    } else if (result->exit_status != CRM_EX_OK) {
533c21
         /* Try to provide a useful error code based on the fence agent's
533c21
          * error output.
533c21
          */
533c21
-        if (svc_action->stderr_data == NULL) {
533c21
-            rv = -ENODATA;
533c21
+        if (result->action_stderr == NULL) {
533c21
+            rc = ENODATA;
533c21
 
533c21
-        } else if (strstr(svc_action->stderr_data, "imed out")) {
533c21
+        } else if (strstr(result->action_stderr, "imed out")) {
533c21
             /* Some agents have their own internal timeouts */
533c21
-            rv = -ETIME;
533c21
+            rc = ETIME;
533c21
 
533c21
-        } else if (strstr(svc_action->stderr_data, "Unrecognised action")) {
533c21
-            rv = -EOPNOTSUPP;
533c21
+        } else if (strstr(result->action_stderr, "Unrecognised action")) {
533c21
+            rc = EOPNOTSUPP;
533c21
 
533c21
         } else {
533c21
-            rv = -pcmk_err_generic;
533c21
+            rc = pcmk_rc_error;
533c21
         }
533c21
     }
533c21
-    return rv;
533c21
+    return rc;
533c21
 }
533c21
 
533c21
 static void
533c21
@@ -782,11 +802,7 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
 {
533c21
     stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
533c21
 
533c21
-    action->rc = svc_action_to_errno(svc_action);
533c21
-    action->output = svc_action->stdout_data;
533c21
-    svc_action->stdout_data = NULL;
533c21
-    action->error = svc_action->stderr_data;
533c21
-    svc_action->stderr_data = NULL;
533c21
+    set_result_from_svc_action(action, svc_action);
533c21
 
533c21
     svc_action->params = NULL;
533c21
 
533c21
@@ -795,7 +811,9 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
 
533c21
     log_action(action, action->pid);
533c21
 
533c21
-    if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
533c21
+    if ((action->result.exit_status != CRM_EX_OK)
533c21
+        && update_remaining_timeout(action)) {
533c21
+
533c21
         int rc = internal_stonith_action_execute(action);
533c21
         if (rc == pcmk_ok) {
533c21
             return;
533c21
@@ -803,7 +821,8 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
     }
533c21
 
533c21
     if (action->done_cb) {
533c21
-        action->done_cb(action->pid, action->rc, action->output, action->userdata);
533c21
+        action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
533c21
+                        action->result.action_stdout, action->userdata);
533c21
     }
533c21
 
533c21
     action->svc_action = NULL; // don't remove our caller
533c21
@@ -835,9 +854,13 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
     static int stonith_sequence = 0;
533c21
     char *buffer = NULL;
533c21
 
533c21
-    if ((action == NULL) || (action->action == NULL) || (action->args == NULL)
533c21
+    CRM_CHECK(action != NULL, return -EINVAL);
533c21
+
533c21
+    if ((action->action == NULL) || (action->args == NULL)
533c21
         || (action->agent == NULL)) {
533c21
-        return -EPROTO;
533c21
+        pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
533c21
+                         PCMK_EXEC_ERROR_FATAL, NULL);
533c21
+        return -EINVAL;
533c21
     }
533c21
 
533c21
     if (!action->tries) {
533c21
@@ -857,6 +880,7 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
     free(buffer);
533c21
 
533c21
     if (svc_action->rc != PCMK_OCF_UNKNOWN) {
533c21
+        set_result_from_svc_action(action, svc_action);
533c21
         services_action_free(svc_action);
533c21
         return -E2BIG;
533c21
     }
533c21
@@ -877,10 +901,7 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
 
533c21
     /* keep retries from executing out of control and free previous results */
533c21
     if (is_retry) {
533c21
-        free(action->output);
533c21
-        action->output = NULL;
533c21
-        free(action->error);
533c21
-        action->error = NULL;
533c21
+        pcmk__reset_result(&(action->result));
533c21
         sleep(1);
533c21
     }
533c21
 
533c21
@@ -889,22 +910,19 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
         if (services_action_async_fork_notify(svc_action,
533c21
                                               &stonith_action_async_done,
533c21
                                               &stonith_action_async_forked)) {
533c21
+            pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
533c21
+                             PCMK_EXEC_PENDING, NULL);
533c21
             return pcmk_ok;
533c21
         }
533c21
 
533c21
     } else if (services_action_sync(svc_action)) { // sync success
533c21
         rc = pcmk_ok;
533c21
-        action->rc = svc_action_to_errno(svc_action);
533c21
-        action->output = svc_action->stdout_data;
533c21
-        svc_action->stdout_data = NULL;
533c21
-        action->error = svc_action->stderr_data;
533c21
-        svc_action->stderr_data = NULL;
533c21
 
533c21
     } else { // sync failure
533c21
-        action->rc = -ECONNABORTED;
533c21
-        rc = action->rc;
533c21
+        rc = -ECONNABORTED;
533c21
     }
533c21
 
533c21
+    set_result_from_svc_action(action, svc_action);
533c21
     svc_action->params = NULL;
533c21
     services_action_free(svc_action);
533c21
     return rc;
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 4c8e0b0ecc53cb3883f0da0eede20b900fff48d1 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Sep 2021 11:14:31 -0500
533c21
Subject: [PATCH 02/12] Low: fencing: improve return code given back to library
533c21
 callers
533c21
533c21
Expose result2rc() internally for future reuse, and expand it to handle more
533c21
cases. In theory, this can give us better log messages and status output for
533c21
failures.
533c21
---
533c21
 include/crm/fencing/internal.h |  1 +
533c21
 lib/fencing/st_client.c        | 63 +++++++++++++++++++++-------------
533c21
 2 files changed, 41 insertions(+), 23 deletions(-)
533c21
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index fa9059e6f..0d23967bb 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -60,6 +60,7 @@ stonith_action_t *stonith_action_create(const char *agent,
533c21
 void stonith__destroy_action(stonith_action_t *action);
533c21
 void stonith__action_result(stonith_action_t *action, int *rc, char **output,
533c21
                             char **error_output);
533c21
+int stonith__result2rc(const pcmk__action_result_t *result);
533c21
 
533c21
 int
533c21
 stonith_action_execute_async(stonith_action_t * action,
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 6c607b010..809be1640 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -119,7 +119,6 @@ static void stonith_connection_destroy(gpointer user_data);
533c21
 static void stonith_send_notification(gpointer data, gpointer user_data);
533c21
 static int internal_stonith_action_execute(stonith_action_t * action);
533c21
 static void log_action(stonith_action_t *action, pid_t pid);
533c21
-static int result2rc(const pcmk__action_result_t *result);
533c21
 
533c21
 /*!
533c21
  * \brief Get agent namespace by name
533c21
@@ -694,7 +693,7 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
533c21
     }
533c21
     if (action != NULL) {
533c21
         if (rc) {
533c21
-            *rc = pcmk_rc2legacy(result2rc(&(action->result)));
533c21
+            *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
533c21
         }
533c21
         if ((output != NULL) && (action->result.action_stdout != NULL)) {
533c21
             *output = action->result.action_stdout;
533c21
@@ -769,32 +768,49 @@ update_remaining_timeout(stonith_action_t * action)
533c21
     return action->remaining_timeout ? TRUE : FALSE;
533c21
 }
533c21
 
533c21
-static int
533c21
-result2rc(const pcmk__action_result_t *result) {
533c21
-    int rc = pcmk_rc_ok;
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Map a fencing action result to a standard return code
533c21
+ *
533c21
+ * \param[in] result  Fencing action result to map
533c21
+ *
533c21
+ * \return Standard Pacemaker return code that best corresponds to \p result
533c21
+ */
533c21
+int
533c21
+stonith__result2rc(const pcmk__action_result_t *result)
533c21
+{
533c21
+    switch (result->execution_status) {
533c21
+        case PCMK_EXEC_CANCELLED:       return ECANCELED;
533c21
+        case PCMK_EXEC_TIMEOUT:         return ETIME;
533c21
+        case PCMK_EXEC_NOT_INSTALLED:   return ENOENT;
533c21
+        case PCMK_EXEC_NOT_SUPPORTED:   return EOPNOTSUPP;
533c21
+        case PCMK_EXEC_NOT_CONNECTED:   return ENOTCONN;
533c21
+        case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
533c21
+        case PCMK_EXEC_NO_SECRETS:      return EACCES;
533c21
+        default:                        break;
533c21
+    }
533c21
 
533c21
-    if (result->execution_status == PCMK_EXEC_TIMEOUT) {
533c21
-            rc = ETIME;
533c21
+    if (result->exit_status == CRM_EX_OK) {
533c21
+        return pcmk_rc_ok;
533c21
+    }
533c21
 
533c21
-    } else if (result->exit_status != CRM_EX_OK) {
533c21
-        /* Try to provide a useful error code based on the fence agent's
533c21
-         * error output.
533c21
-         */
533c21
-        if (result->action_stderr == NULL) {
533c21
-            rc = ENODATA;
533c21
+    // Try to provide useful error code based on result's error output
533c21
 
533c21
-        } else if (strstr(result->action_stderr, "imed out")) {
533c21
-            /* Some agents have their own internal timeouts */
533c21
-            rc = ETIME;
533c21
+    if (result->action_stderr == NULL) {
533c21
+        return ENODATA;
533c21
 
533c21
-        } else if (strstr(result->action_stderr, "Unrecognised action")) {
533c21
-            rc = EOPNOTSUPP;
533c21
+    } else if (strcasestr(result->action_stderr, "timed out")
533c21
+               || strcasestr(result->action_stderr, "timeout")) {
533c21
+        return ETIME;
533c21
 
533c21
-        } else {
533c21
-            rc = pcmk_rc_error;
533c21
-        }
533c21
+    } else if (strcasestr(result->action_stderr, "unrecognised action")
533c21
+               || strcasestr(result->action_stderr, "unrecognized action")
533c21
+               || strcasestr(result->action_stderr, "unsupported action")) {
533c21
+        return EOPNOTSUPP;
533c21
     }
533c21
-    return rc;
533c21
+
533c21
+    // Oh well, we tried
533c21
+    return pcmk_rc_error;
533c21
 }
533c21
 
533c21
 static void
533c21
@@ -821,7 +837,8 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
     }
533c21
 
533c21
     if (action->done_cb) {
533c21
-        action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
533c21
+        action->done_cb(action->pid,
533c21
+                        pcmk_rc2legacy(stonith__result2rc(&(action->result))),
533c21
                         action->result.action_stdout, action->userdata);
533c21
     }
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 153c9b552a5bad9dd36e8635fa478ed9cad1f240 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 7 Oct 2021 11:35:44 -0500
533c21
Subject: [PATCH 03/12] Refactor: fencing: return full result from
533c21
 stonith__action_result()
533c21
533c21
Previously, stonith__action_result() grabbed an action's legacy rc, stdout, and
533c21
stderr separately. Now, directly return a pointer to the action's result
533c21
object, and map that to a legacy rc in the callers when needed.
533c21
---
533c21
 include/crm/fencing/internal.h |  3 +--
533c21
 lib/fencing/st_client.c        | 36 ++++---------------------
533c21
 lib/fencing/st_rhcs.c          | 48 ++++++++++++++++++++++++----------
533c21
 3 files changed, 40 insertions(+), 47 deletions(-)
533c21
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index 0d23967bb..4e9f50fe8 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -58,8 +58,7 @@ stonith_action_t *stonith_action_create(const char *agent,
533c21
                                         GHashTable * port_map,
533c21
                                         const char * host_arg);
533c21
 void stonith__destroy_action(stonith_action_t *action);
533c21
-void stonith__action_result(stonith_action_t *action, int *rc, char **output,
533c21
-                            char **error_output);
533c21
+pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
533c21
 int stonith__result2rc(const pcmk__action_result_t *result);
533c21
 
533c21
 int
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 809be1640..b9df18465 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -670,40 +670,14 @@ stonith__destroy_action(stonith_action_t *action)
533c21
  * \internal
533c21
  * \brief Get the result of an executed stonith action
533c21
  *
533c21
- * \param[in,out] action        Executed action
533c21
- * \param[out]    rc            Where to store result code (or NULL)
533c21
- * \param[out]    output        Where to store standard output (or NULL)
533c21
- * \param[out]    error_output  Where to store standard error output (or NULL)
533c21
+ * \param[in] action  Executed action
533c21
  *
533c21
- * \note If output or error_output is not NULL, the caller is responsible for
533c21
- *       freeing the memory.
533c21
+ * \return Pointer to action's result (or NULL if \p action is NULL)
533c21
  */
533c21
-void
533c21
-stonith__action_result(stonith_action_t *action, int *rc, char **output,
533c21
-                       char **error_output)
533c21
+pcmk__action_result_t *
533c21
+stonith__action_result(stonith_action_t *action)
533c21
 {
533c21
-    if (rc) {
533c21
-        *rc = pcmk_ok;
533c21
-    }
533c21
-    if (output) {
533c21
-        *output = NULL;
533c21
-    }
533c21
-    if (error_output) {
533c21
-        *error_output = NULL;
533c21
-    }
533c21
-    if (action != NULL) {
533c21
-        if (rc) {
533c21
-            *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
533c21
-        }
533c21
-        if ((output != NULL) && (action->result.action_stdout != NULL)) {
533c21
-            *output = action->result.action_stdout;
533c21
-            action->result.action_stdout = NULL; // hand off ownership to caller
533c21
-        }
533c21
-        if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
533c21
-            *error_output = action->result.action_stderr;
533c21
-            action->result.action_stderr = NULL; // hand off ownership to caller
533c21
-        }
533c21
-    }
533c21
+    return (action == NULL)? NULL : &(action->result);
533c21
 }
533c21
 
533c21
 #define FAILURE_MAX_RETRIES 2
533c21
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
533c21
index 89a2625bd..23e694975 100644
533c21
--- a/lib/fencing/st_rhcs.c
533c21
+++ b/lib/fencing/st_rhcs.c
533c21
@@ -1,5 +1,5 @@
533c21
 /*
533c21
- * Copyright 2004-2020 the Pacemaker project contributors
533c21
+ * Copyright 2004-2021 the Pacemaker project contributors
533c21
  *
533c21
  * The version control history for this file may have further details.
533c21
  *
533c21
@@ -123,10 +123,10 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
533c21
 static int
533c21
 stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
533c21
 {
533c21
-    char *buffer = NULL;
533c21
     xmlNode *xml = NULL;
533c21
     xmlNode *actions = NULL;
533c21
     xmlXPathObject *xpathObj = NULL;
533c21
+    pcmk__action_result_t *result = NULL;
533c21
     stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
533c21
                                                      5, NULL, NULL, NULL);
533c21
     int rc = stonith__execute(action);
533c21
@@ -138,23 +138,31 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
533c21
         return rc;
533c21
     }
533c21
 
533c21
-    stonith__action_result(action, &rc, &buffer, NULL);
533c21
-    stonith__destroy_action(action);
533c21
-    if (rc < 0) {
533c21
-        crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d",
533c21
-                 agent, pcmk_strerror(rc), rc);
533c21
-        free(buffer);
533c21
-        return rc;
533c21
+    result = stonith__action_result(action);
533c21
+
533c21
+    if (result->execution_status != PCMK_EXEC_DONE) {
533c21
+        crm_warn("Could not execute metadata action for %s: %s",
533c21
+                 agent, pcmk_exec_status_str(result->execution_status));
533c21
+        stonith__destroy_action(action);
533c21
+        return pcmk_rc2legacy(stonith__result2rc(result));
533c21
     }
533c21
 
533c21
-    if (buffer == NULL) {
533c21
+    if (result->exit_status != CRM_EX_OK) {
533c21
+        crm_warn("Metadata action for %s returned error code %d",
533c21
+                 agent, result->exit_status);
533c21
+        stonith__destroy_action(action);
533c21
+        return pcmk_rc2legacy(stonith__result2rc(result));
533c21
+    }
533c21
+
533c21
+    if (result->action_stdout == NULL) {
533c21
         crm_warn("Metadata action for %s returned no data", agent);
533c21
+        stonith__destroy_action(action);
533c21
         return -ENODATA;
533c21
     }
533c21
 
533c21
-    xml = string2xml(buffer);
533c21
-    free(buffer);
533c21
-    buffer = NULL;
533c21
+    xml = string2xml(result->action_stdout);
533c21
+    stonith__destroy_action(action);
533c21
+
533c21
     if (xml == NULL) {
533c21
         crm_warn("Metadata for %s is invalid", agent);
533c21
         return -pcmk_err_schema_validation;
533c21
@@ -289,7 +297,19 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target,
533c21
 
533c21
     rc = stonith__execute(action);
533c21
     if (rc == pcmk_ok) {
533c21
-        stonith__action_result(action, &rc, output, error_output);
533c21
+        pcmk__action_result_t *result = stonith__action_result(action);
533c21
+
533c21
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
+
533c21
+        // Take ownership of output so stonith__destroy_action() doesn't free it
533c21
+        if (output != NULL) {
533c21
+            *output = result->action_stdout;
533c21
+            result->action_stdout = NULL;
533c21
+        }
533c21
+        if (error_output != NULL) {
533c21
+            *error_output = result->action_stderr;
533c21
+            result->action_stderr = NULL;
533c21
+        }
533c21
     }
533c21
     stonith__destroy_action(action);
533c21
     return rc;
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 7f7067014357cccb229a0bef091e234eb3765f7a Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Sep 2021 13:05:54 -0500
533c21
Subject: [PATCH 04/12] Refactor: fencing: pass full result to async action
533c21
 callback
533c21
533c21
When executing an asynchronous fence agent command, the fencing library gets
533c21
the full result (exit status, execution status, and exit reason) from the
533c21
services library, then maps that to a legacy return code.
533c21
533c21
Now, pass the full result object to the fencing async callback, rather than
533c21
separate arguments for legacy code and stdout. The mapping to a legacy code now
533c21
happens in the fencer rather than the fencing library.
533c21
533c21
The goal of this and following commits is to push the full result object
533c21
further down the code path, so that ultimately the full result is always
533c21
available internally, and the legacy code mapping is only done for backward
533c21
compatibility when sending the result back to a client.
533c21
533c21
This commit focuses on the async callback (done_cb() in both the fencer's
533c21
async_command_t and the fencing library's stonith_action_t). Later commits will
533c21
follow the chain:
533c21
533c21
	st_child_done() and stonith_fence_get_devices_cb()
533c21
	-> stonith_send_async_reply()
533c21
	-> stonith_construct_async_reply() and log_async_result()
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 78 +++++++++++++++++++++-----------
533c21
 include/crm/fencing/internal.h   |  3 +-
533c21
 lib/fencing/st_client.c          | 10 ++--
533c21
 3 files changed, 58 insertions(+), 33 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index b5ae28d90..d5d04ae69 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -62,7 +62,8 @@ struct device_search_s {
533c21
 };
533c21
 
533c21
 static gboolean stonith_device_dispatch(gpointer user_data);
533c21
-static void st_child_done(int pid, int rc, const char *output, void *user_data);
533c21
+static void st_child_done(int pid, const pcmk__action_result_t *result,
533c21
+                          void *user_data);
533c21
 static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
533c21
                                const char *client_id);
533c21
 
533c21
@@ -99,7 +100,8 @@ typedef struct async_command_s {
533c21
     GList *device_next;
533c21
 
533c21
     void *internal_user_data;
533c21
-    void (*done_cb) (int pid, int rc, const char *output, void *user_data);
533c21
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
533c21
+                     void *user_data);
533c21
     guint timer_sigterm;
533c21
     guint timer_sigkill;
533c21
     /*! If the operation timed out, this is the last signal
533c21
@@ -377,13 +379,25 @@ get_agent_metadata_cb(gpointer data) {
533c21
  * \internal
533c21
  * \brief Call a command's action callback for an internal (not library) result
533c21
  *
533c21
- * \param[in] cmd     Command to report result for
533c21
- * \param[in] rc      Legacy return code to pass to callback
533c21
+ * \param[in] cmd               Command to report result for
533c21
+ * \param[in] execution_status  Execution status to use for result
533c21
+ * \param[in] exit_status       Exit status to use for result
533c21
+ * \param[in] exit_reason       Exit reason to use for result
533c21
  */
533c21
 static void
533c21
-report_internal_result(async_command_t *cmd, int rc)
533c21
+report_internal_result(async_command_t *cmd, int exit_status,
533c21
+                       int execution_status, const char *exit_reason)
533c21
 {
533c21
-    cmd->done_cb(0, rc, NULL, cmd);
533c21
+    pcmk__action_result_t result = {
533c21
+        // Ensure we don't pass garbage to free()
533c21
+        .exit_reason = NULL,
533c21
+        .action_stdout = NULL,
533c21
+        .action_stderr = NULL
533c21
+    };
533c21
+
533c21
+    pcmk__set_result(&result, exit_status, execution_status, exit_reason);
533c21
+    cmd->done_cb(0, &result, cmd);
533c21
+    pcmk__reset_result(&result);
533c21
 }
533c21
 
533c21
 static gboolean
533c21
@@ -446,7 +460,7 @@ stonith_device_execute(stonith_device_t * device)
533c21
             }
533c21
         } else {
533c21
             crm_info("Faking success for %s watchdog operation", cmd->action);
533c21
-            report_internal_result(cmd, pcmk_ok);
533c21
+            report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
533c21
             goto done;
533c21
         }
533c21
     }
533c21
@@ -462,7 +476,8 @@ stonith_device_execute(stonith_device_t * device)
533c21
             crm_err("Considering %s unconfigured "
533c21
                     "because unable to load CIB secrets: %s",
533c21
                      device->id, pcmk_rc_str(exec_rc));
533c21
-            report_internal_result(cmd, -EACCES);
533c21
+            report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
533c21
+                                   NULL);
533c21
             goto done;
533c21
         }
533c21
     }
533c21
@@ -501,7 +516,7 @@ stonith_device_execute(stonith_device_t * device)
533c21
                                            cmd->done_cb, fork_cb);
533c21
     if (exec_rc < 0) {
533c21
         cmd->activating_on = NULL;
533c21
-        report_internal_result(cmd, exec_rc);
533c21
+        cmd->done_cb(0, stonith__action_result(action), cmd);
533c21
         stonith__destroy_action(action);
533c21
     }
533c21
 
533c21
@@ -625,7 +640,8 @@ free_device(gpointer data)
533c21
         async_command_t *cmd = gIter->data;
533c21
 
533c21
         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
533c21
-        report_internal_result(cmd, -ENODEV);
533c21
+        report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
533c21
+                               NULL);
533c21
     }
533c21
     g_list_free(device->pending_ops);
533c21
 
533c21
@@ -1079,7 +1095,8 @@ schedule_internal_command(const char *origin,
533c21
                           const char *victim,
533c21
                           int timeout,
533c21
                           void *internal_user_data,
533c21
-                          void (*done_cb) (int pid, int rc, const char *output,
533c21
+                          void (*done_cb) (int pid,
533c21
+                                           const pcmk__action_result_t *result,
533c21
                                            void *user_data))
533c21
 {
533c21
     async_command_t *cmd = NULL;
533c21
@@ -1111,7 +1128,7 @@ enum fence_status_code {
533c21
 };
533c21
 
533c21
 static void
533c21
-status_search_cb(int pid, int rc, const char *output, void *user_data)
533c21
+status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 {
533c21
     async_command_t *cmd = user_data;
533c21
     struct device_search_s *search = cmd->internal_user_data;
533c21
@@ -1127,7 +1144,7 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
533c21
 
533c21
     mainloop_set_trigger(dev->work);
533c21
 
533c21
-    switch (rc) {
533c21
+    switch (result->exit_status) {
533c21
         case fence_status_unknown:
533c21
             crm_trace("%s reported it cannot fence %s", dev->id, search->host);
533c21
             break;
533c21
@@ -1141,14 +1158,15 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
533c21
         default:
533c21
             crm_warn("Assuming %s cannot fence %s "
533c21
                      "(status returned unknown code %d)",
533c21
-                     dev->id, search->host, rc);
533c21
+                     dev->id, search->host, result->exit_status);
533c21
             break;
533c21
     }
533c21
     search_devices_record_result(search, dev->id, can);
533c21
 }
533c21
 
533c21
 static void
533c21
-dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
533c21
+dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
533c21
+                       void *user_data)
533c21
 {
533c21
     async_command_t *cmd = user_data;
533c21
     struct device_search_s *search = cmd->internal_user_data;
533c21
@@ -1169,21 +1187,21 @@ dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
533c21
 
533c21
     mainloop_set_trigger(dev->work);
533c21
 
533c21
-    if (rc == CRM_EX_OK) {
533c21
+    if (result->exit_status == CRM_EX_OK) {
533c21
         crm_info("Refreshing target list for %s", dev->id);
533c21
         g_list_free_full(dev->targets, free);
533c21
-        dev->targets = stonith__parse_targets(output);
533c21
+        dev->targets = stonith__parse_targets(result->action_stdout);
533c21
         dev->targets_age = time(NULL);
533c21
 
533c21
     } else if (dev->targets != NULL) {
533c21
         crm_info("Reusing most recent target list for %s "
533c21
                  "because list returned error code %d",
533c21
-                 dev->id, rc);
533c21
+                 dev->id, result->exit_status);
533c21
 
533c21
     } else { // We have never successfully executed list
533c21
         crm_warn("Assuming %s cannot fence %s "
533c21
                  "because list returned error code %d",
533c21
-                 dev->id, search->host, rc);
533c21
+                 dev->id, search->host, result->exit_status);
533c21
 
533c21
         /* Fall back to pcmk_host_check="status" if the user didn't explicitly
533c21
          * specify "dynamic-list".
533c21
@@ -2407,7 +2425,7 @@ cancel_stonith_command(async_command_t * cmd)
533c21
 }
533c21
 
533c21
 static void
533c21
-st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
+st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 {
533c21
     stonith_device_t *device = NULL;
533c21
     stonith_device_t *next_device = NULL;
533c21
@@ -2423,7 +2441,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
     /* The device is ready to do something else now */
533c21
     device = g_hash_table_lookup(device_list, cmd->device);
533c21
     if (device) {
533c21
-        if (!device->verified && (rc == pcmk_ok) &&
533c21
+        if (!device->verified && (result->exit_status == CRM_EX_OK) &&
533c21
             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
533c21
 
533c21
             device->verified = TRUE;
533c21
@@ -2432,7 +2450,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
         mainloop_set_trigger(device->work);
533c21
     }
533c21
 
533c21
-    if (rc == 0) {
533c21
+    if (result->exit_status == CRM_EX_OK) {
533c21
         GList *iter;
533c21
         /* see if there are any required devices left to execute for this op */
533c21
         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
533c21
@@ -2445,7 +2463,8 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
             next_device = NULL;
533c21
         }
533c21
 
533c21
-    } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
533c21
+    } else if ((cmd->device_next != NULL)
533c21
+               && !is_action_required(cmd->action, device)) {
533c21
         /* if this device didn't work out, see if there are any others we can try.
533c21
          * if the failed device was 'required', we can't pick another device. */
533c21
         next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
533c21
@@ -2454,16 +2473,19 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
 
533c21
     /* this operation requires more fencing, hooray! */
533c21
     if (next_device) {
533c21
-        log_async_result(cmd, rc, pid, next_device->id, output, FALSE);
533c21
+        log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
533c21
+                         next_device->id, result->action_stdout, FALSE);
533c21
         schedule_stonith_command(cmd, next_device);
533c21
         /* Prevent cmd from being freed */
533c21
         cmd = NULL;
533c21
         goto done;
533c21
     }
533c21
 
533c21
-    stonith_send_async_reply(cmd, output, rc, pid, false);
533c21
+    stonith_send_async_reply(cmd, result->action_stdout,
533c21
+                             pcmk_rc2legacy(stonith__result2rc(result)), pid,
533c21
+                             false);
533c21
 
533c21
-    if (rc != 0) {
533c21
+    if (result->exit_status != CRM_EX_OK) {
533c21
         goto done;
533c21
     }
533c21
 
533c21
@@ -2509,7 +2531,9 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
533c21
 
533c21
         cmd_list = g_list_remove_link(cmd_list, gIter);
533c21
 
533c21
-        stonith_send_async_reply(cmd_other, output, rc, pid, true);
533c21
+        stonith_send_async_reply(cmd_other, result->action_stdout,
533c21
+                                 pcmk_rc2legacy(stonith__result2rc(result)),
533c21
+                                 pid, true);
533c21
         cancel_stonith_command(cmd_other);
533c21
 
533c21
         free_async_command(cmd_other);
533c21
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
533c21
index 4e9f50fe8..6a7e4232c 100644
533c21
--- a/include/crm/fencing/internal.h
533c21
+++ b/include/crm/fencing/internal.h
533c21
@@ -64,7 +64,8 @@ int stonith__result2rc(const pcmk__action_result_t *result);
533c21
 int
533c21
 stonith_action_execute_async(stonith_action_t * action,
533c21
                              void *userdata,
533c21
-                             void (*done) (int pid, int rc, const char *output,
533c21
+                             void (*done) (int pid,
533c21
+                                           const pcmk__action_result_t *result,
533c21
                                            void *user_data),
533c21
                              void (*fork_cb) (int pid, void *user_data));
533c21
 
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index b9df18465..59dcab9a3 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -46,7 +46,8 @@ struct stonith_action_s {
533c21
     int timeout;
533c21
     int async;
533c21
     void *userdata;
533c21
-    void (*done_cb) (int pid, int status, const char *output, void *user_data);
533c21
+    void (*done_cb) (int pid, const pcmk__action_result_t *result,
533c21
+                     void *user_data);
533c21
     void (*fork_cb) (int pid, void *user_data);
533c21
 
533c21
     svc_action_t *svc_action;
533c21
@@ -811,9 +812,7 @@ stonith_action_async_done(svc_action_t *svc_action)
533c21
     }
533c21
 
533c21
     if (action->done_cb) {
533c21
-        action->done_cb(action->pid,
533c21
-                        pcmk_rc2legacy(stonith__result2rc(&(action->result))),
533c21
-                        action->result.action_stdout, action->userdata);
533c21
+        action->done_cb(action->pid, &(action->result), action->userdata);
533c21
     }
533c21
 
533c21
     action->svc_action = NULL; // don't remove our caller
533c21
@@ -933,7 +932,8 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
 int
533c21
 stonith_action_execute_async(stonith_action_t * action,
533c21
                              void *userdata,
533c21
-                             void (*done) (int pid, int rc, const char *output,
533c21
+                             void (*done) (int pid,
533c21
+                                           const pcmk__action_result_t *result,
533c21
                                            void *user_data),
533c21
                              void (*fork_cb) (int pid, void *user_data))
533c21
 {
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From bbd022306df7a873c0ecb2be2d33c56fbf327b8c Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Sep 2021 11:51:28 -0500
533c21
Subject: [PATCH 05/12] Feature: fencing: set exit reason for internal
533c21
 execution errors
533c21
533c21
... most importantly, copying any exit reason set by the services library.
533c21
This ensures that the stonith_action_t exit reason is set when appropriate.
533c21
However, nothing uses it as of this commit.
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 4 ++--
533c21
 lib/fencing/st_client.c          | 6 +++---
533c21
 2 files changed, 5 insertions(+), 5 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index d5d04ae69..f55a32649 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -477,7 +477,7 @@ stonith_device_execute(stonith_device_t * device)
533c21
                     "because unable to load CIB secrets: %s",
533c21
                      device->id, pcmk_rc_str(exec_rc));
533c21
             report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
533c21
-                                   NULL);
533c21
+                                   "Failed to get CIB secrets");
533c21
             goto done;
533c21
         }
533c21
     }
533c21
@@ -641,7 +641,7 @@ free_device(gpointer data)
533c21
 
533c21
         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
533c21
         report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
533c21
-                               NULL);
533c21
+                               "Device was removed before action could be executed");
533c21
     }
533c21
     g_list_free(device->pending_ops);
533c21
 
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 59dcab9a3..3d4127eff 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -207,7 +207,7 @@ static void
533c21
 set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
533c21
 {
533c21
     pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
533c21
-                     NULL);
533c21
+                     services__exit_reason(svc_action));
533c21
     pcmk__set_result_output(&(action->result),
533c21
                             services__grab_stdout(svc_action),
533c21
                             services__grab_stderr(svc_action));
533c21
@@ -706,7 +706,7 @@ stonith_action_create(const char *agent,
533c21
     action->max_retries = FAILURE_MAX_RETRIES;
533c21
 
533c21
     pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
533c21
-                     NULL);
533c21
+                     "Initialization bug in fencing library");
533c21
 
533c21
     if (device_args) {
533c21
         char buffer[512];
533c21
@@ -849,7 +849,7 @@ internal_stonith_action_execute(stonith_action_t * action)
533c21
     if ((action->action == NULL) || (action->args == NULL)
533c21
         || (action->agent == NULL)) {
533c21
         pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
533c21
-                         PCMK_EXEC_ERROR_FATAL, NULL);
533c21
+                         PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
533c21
         return -EINVAL;
533c21
     }
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From ed08f600688af1d25412d2427502ba5d4a55c0d6 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 7 Oct 2021 12:06:10 -0500
533c21
Subject: [PATCH 06/12] Fix: fencer: handle dynamic target query failures
533c21
 better
533c21
533c21
Previously, the callbacks for list and status queries checked only the result's
533c21
exit status. However, the services library will use PCMK_OCF_UNKNOWN_ERROR (1)
533c21
as the exit status for internal failures, and that value signifies a recognized
533c21
node (not an error) for fence list actions.
533c21
533c21
Now, the callbacks check the execution status as well.
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 46 +++++++++++++++++++++++++++-----
533c21
 1 file changed, 39 insertions(+), 7 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index f55a32649..7b3fb25a1 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -1144,6 +1144,18 @@ status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 
533c21
     mainloop_set_trigger(dev->work);
533c21
 
533c21
+    if (result->execution_status != PCMK_EXEC_DONE) {
533c21
+        crm_warn("Assuming %s cannot fence %s "
533c21
+                 "because status could not be executed: %s%s%s%s",
533c21
+                 dev->id, search->host,
533c21
+                 pcmk_exec_status_str(result->execution_status),
533c21
+                 ((result->exit_reason == NULL)? "" : " ("),
533c21
+                 ((result->exit_reason == NULL)? "" : result->exit_reason),
533c21
+                 ((result->exit_reason == NULL)? "" : ")"));
533c21
+        search_devices_record_result(search, dev->id, FALSE);
533c21
+        return;
533c21
+    }
533c21
+
533c21
     switch (result->exit_status) {
533c21
         case fence_status_unknown:
533c21
             crm_trace("%s reported it cannot fence %s", dev->id, search->host);
533c21
@@ -1187,21 +1199,41 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
533c21
 
533c21
     mainloop_set_trigger(dev->work);
533c21
 
533c21
-    if (result->exit_status == CRM_EX_OK) {
533c21
+    if ((result->execution_status == PCMK_EXEC_DONE)
533c21
+        && (result->exit_status == CRM_EX_OK)) {
533c21
         crm_info("Refreshing target list for %s", dev->id);
533c21
         g_list_free_full(dev->targets, free);
533c21
         dev->targets = stonith__parse_targets(result->action_stdout);
533c21
         dev->targets_age = time(NULL);
533c21
 
533c21
     } else if (dev->targets != NULL) {
533c21
-        crm_info("Reusing most recent target list for %s "
533c21
-                 "because list returned error code %d",
533c21
-                 dev->id, result->exit_status);
533c21
+        if (result->execution_status == PCMK_EXEC_DONE) {
533c21
+            crm_info("Reusing most recent target list for %s "
533c21
+                     "because list returned error code %d",
533c21
+                     dev->id, result->exit_status);
533c21
+        } else {
533c21
+            crm_info("Reusing most recent target list for %s "
533c21
+                     "because list could not be executed: %s%s%s%s",
533c21
+                     dev->id, pcmk_exec_status_str(result->execution_status),
533c21
+                     ((result->exit_reason == NULL)? "" : " ("),
533c21
+                     ((result->exit_reason == NULL)? "" : result->exit_reason),
533c21
+                     ((result->exit_reason == NULL)? "" : ")"));
533c21
+        }
533c21
 
533c21
     } else { // We have never successfully executed list
533c21
-        crm_warn("Assuming %s cannot fence %s "
533c21
-                 "because list returned error code %d",
533c21
-                 dev->id, search->host, result->exit_status);
533c21
+        if (result->execution_status == PCMK_EXEC_DONE) {
533c21
+            crm_warn("Assuming %s cannot fence %s "
533c21
+                     "because list returned error code %d",
533c21
+                     dev->id, search->host, result->exit_status);
533c21
+        } else {
533c21
+            crm_warn("Assuming %s cannot fence %s "
533c21
+                     "because list could not be executed: %s%s%s%s",
533c21
+                     dev->id, search->host,
533c21
+                     pcmk_exec_status_str(result->execution_status),
533c21
+                     ((result->exit_reason == NULL)? "" : " ("),
533c21
+                     ((result->exit_reason == NULL)? "" : result->exit_reason),
533c21
+                     ((result->exit_reason == NULL)? "" : ")"));
533c21
+        }
533c21
 
533c21
         /* Fall back to pcmk_host_check="status" if the user didn't explicitly
533c21
          * specify "dynamic-list".
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 5a30238a3b8691a5fc20f53906c0efcc50193306 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Sep 2021 15:57:50 -0500
533c21
Subject: [PATCH 07/12] Refactor: fencer: pass result object when sending an
533c21
 async reply
533c21
533c21
... via stonith_send_async_reply(), instead of sending the mapped legacy code
533c21
and action stdout separately. Also, drop the "stonith_" prefix since the
533c21
function is static.
533c21
533c21
This moves the mapping from the stonith_send_async_reply() callers to the
533c21
function itself, so we use the result object and standard codes as long as
533c21
possible, and map to a legacy code only where needed.
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 62 +++++++++++++++++++-------------
533c21
 daemons/fenced/fenced_remote.c   |  2 +-
533c21
 2 files changed, 39 insertions(+), 25 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index 7b3fb25a1..e5f8162ce 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -2376,12 +2376,28 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
533c21
     }
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Reply to requester after asynchronous command completion
533c21
+ *
533c21
+ * \param[in] cmd      Command that completed
533c21
+ * \param[in] result   Result of command
533c21
+ * \param[in] pid      Process ID of command, if available
533c21
+ * \param[in] merged   If true, command was merged with another, not executed
533c21
+ */
533c21
 static void
533c21
-stonith_send_async_reply(async_command_t *cmd, const char *output, int rc,
533c21
-                         int pid, bool merged)
533c21
+send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
+                 int pid, bool merged)
533c21
 {
533c21
     xmlNode *reply = NULL;
533c21
     gboolean bcast = FALSE;
533c21
+    const char *output = NULL;
533c21
+    int rc = pcmk_ok;
533c21
+
533c21
+    CRM_CHECK((cmd != NULL) && (result != NULL), return);
533c21
+
533c21
+    output = result->action_stdout;
533c21
+    rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
 
533c21
     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
533c21
 
533c21
@@ -2513,9 +2529,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
         goto done;
533c21
     }
533c21
 
533c21
-    stonith_send_async_reply(cmd, result->action_stdout,
533c21
-                             pcmk_rc2legacy(stonith__result2rc(result)), pid,
533c21
-                             false);
533c21
+    send_async_reply(cmd, result, pid, false);
533c21
 
533c21
     if (result->exit_status != CRM_EX_OK) {
533c21
         goto done;
533c21
@@ -2563,9 +2577,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 
533c21
         cmd_list = g_list_remove_link(cmd_list, gIter);
533c21
 
533c21
-        stonith_send_async_reply(cmd_other, result->action_stdout,
533c21
-                                 pcmk_rc2legacy(stonith__result2rc(result)),
533c21
-                                 pid, true);
533c21
+        send_async_reply(cmd_other, result, pid, true);
533c21
         cancel_stonith_command(cmd_other);
533c21
 
533c21
         free_async_command(cmd_other);
533c21
@@ -2604,26 +2616,28 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
533c21
         /* Order based on priority */
533c21
         devices = g_list_sort(devices, sort_device_priority);
533c21
         device = g_hash_table_lookup(device_list, devices->data);
533c21
-
533c21
-        if (device) {
533c21
-            cmd->device_list = devices;
533c21
-            cmd->device_next = devices->next;
533c21
-            devices = NULL;     /* list owned by cmd now */
533c21
-        }
533c21
     }
533c21
 
533c21
-    /* we have a device, schedule it for fencing. */
533c21
-    if (device) {
533c21
-        schedule_stonith_command(cmd, device);
533c21
-        /* in progress */
533c21
-        return;
533c21
-    }
533c21
+    if (device == NULL) { // No device found
533c21
+        pcmk__action_result_t result = {
533c21
+            // Ensure we don't pass garbage to free()
533c21
+            .exit_reason = NULL,
533c21
+            .action_stdout = NULL,
533c21
+            .action_stderr = NULL
533c21
+        };
533c21
 
533c21
-    /* no device found! */
533c21
-    stonith_send_async_reply(cmd, NULL, -ENODEV, 0, false);
533c21
+        pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
533c21
+                         "No fence device configured for target");
533c21
+        send_async_reply(cmd, &result, 0, false);
533c21
+        pcmk__reset_result(&result);
533c21
+        free_async_command(cmd);
533c21
+        g_list_free_full(devices, free);
533c21
 
533c21
-    free_async_command(cmd);
533c21
-    g_list_free_full(devices, free);
533c21
+    } else { // Device found, schedule it for fencing
533c21
+        cmd->device_list = devices;
533c21
+        cmd->device_next = devices->next;
533c21
+        schedule_stonith_command(cmd, device);
533c21
+    }
533c21
 }
533c21
 
533c21
 static int
533c21
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
533c21
index ffaf60018..b09d2865e 100644
533c21
--- a/daemons/fenced/fenced_remote.c
533c21
+++ b/daemons/fenced/fenced_remote.c
533c21
@@ -996,7 +996,7 @@ stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
533c21
 
533c21
     remote_op_done(op, msg, pcmk_ok, FALSE);
533c21
 
533c21
-    /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
533c21
+    // Replies are sent via done_cb -> send_async_reply() -> do_local_reply()
533c21
     return -EINPROGRESS;
533c21
 }
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From c67b6bfbe0baa1253058417ddfb9bc4cf0844e27 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 7 Oct 2021 17:25:38 -0500
533c21
Subject: [PATCH 08/12] Refactor: fencer: pass result object when building
533c21
 async reply
533c21
533c21
... via stonith_construct_async_reply(), instead of passing a mapped legacy rc
533c21
and action output separately, which will be helpful when we add the exit reason
533c21
to the reply. Also, drop the "stonith_" prefix since the function is static, and
533c21
drop an unused argument.
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 33 +++++++++++++++-----------------
533c21
 1 file changed, 15 insertions(+), 18 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index e5f8162ce..6bc12e6c4 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -112,8 +112,8 @@ typedef struct async_command_s {
533c21
     stonith_device_t *activating_on;
533c21
 } async_command_t;
533c21
 
533c21
-static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
533c21
-                                              xmlNode * data, int rc);
533c21
+static xmlNode *construct_async_reply(async_command_t *cmd,
533c21
+                                      const pcmk__action_result_t *result);
533c21
 
533c21
 static gboolean
533c21
 is_action_required(const char *action, stonith_device_t *device)
533c21
@@ -2399,7 +2399,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
     output = result->action_stdout;
533c21
     rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
 
533c21
-    reply = stonith_construct_async_reply(cmd, output, NULL, rc);
533c21
+    reply = construct_async_reply(cmd, result);
533c21
 
533c21
     // Only replies for certain actions are broadcast
533c21
     if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status",
533c21
@@ -2732,17 +2732,20 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
533c21
     return reply;
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Build an XML reply to an asynchronous fencing command
533c21
+ *
533c21
+ * \param[in] cmd     Fencing command that reply is for
533c21
+ * \param[in] result  Command result
533c21
+ */
533c21
 static xmlNode *
533c21
-stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
533c21
+construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
533c21
 {
533c21
-    xmlNode *reply = NULL;
533c21
-
533c21
-    crm_trace("Creating a basic reply");
533c21
-    reply = create_xml_node(NULL, T_STONITH_REPLY);
533c21
+    xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
533c21
 
533c21
     crm_xml_add(reply, "st_origin", __func__);
533c21
     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
533c21
-
533c21
     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
533c21
     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
533c21
     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
533c21
@@ -2753,15 +2756,9 @@ stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode
533c21
     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
533c21
     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
533c21
     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
533c21
-
533c21
-    crm_xml_add_int(reply, F_STONITH_RC, rc);
533c21
-
533c21
-    crm_xml_add(reply, "st_output", output);
533c21
-
533c21
-    if (data != NULL) {
533c21
-        crm_info("Attaching reply output");
533c21
-        add_message_xml(reply, F_STONITH_CALLDATA, data);
533c21
-    }
533c21
+    crm_xml_add_int(reply, F_STONITH_RC,
533c21
+                    pcmk_rc2legacy(stonith__result2rc(result)));
533c21
+    crm_xml_add(reply, "st_output", result->action_stdout);
533c21
     return reply;
533c21
 }
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 2686caeb3b74f687ddd86a4e483250ca8096ba7c Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 19 Oct 2021 18:27:31 -0500
533c21
Subject: [PATCH 09/12] Log: fencer: improve messages for asynchronous results
533c21
533c21
Now that we have the full result object, pass it to log_async_result().
533c21
Instead of logging a mapped legacy rc, log the execution status or exit status
533c21
as appropriate, along with the exit reason.
533c21
---
533c21
 daemons/fenced/fenced_commands.c | 43 +++++++++++++++++---------------
533c21
 1 file changed, 23 insertions(+), 20 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
533c21
index 6bc12e6c4..9d06c68dc 100644
533c21
--- a/daemons/fenced/fenced_commands.c
533c21
+++ b/daemons/fenced/fenced_commands.c
533c21
@@ -2305,15 +2305,14 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
533c21
  * \brief Log the result of an asynchronous command
533c21
  *
533c21
  * \param[in] cmd        Command the result is for
533c21
- * \param[in] rc         Legacy return code corresponding to result
533c21
+ * \param[in] result     Result of command
533c21
  * \param[in] pid        Process ID of command, if available
533c21
  * \param[in] next       Alternate device that will be tried if command failed
533c21
- * \param[in] output     Command output, if any
533c21
  * \param[in] op_merged  Whether this command was merged with an earlier one
533c21
  */
533c21
 static void
533c21
-log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
533c21
-                 const char *output, gboolean op_merged)
533c21
+log_async_result(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
+                 int pid, const char *next, bool op_merged)
533c21
 {
533c21
     int log_level = LOG_ERR;
533c21
     int output_log_level = LOG_NEVER;
533c21
@@ -2321,17 +2320,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
533c21
 
533c21
     GString *msg = g_string_sized_new(80); // Reasonable starting size
533c21
 
533c21
-    // Choose log levels appropriately
533c21
-    if (rc == 0) { // Success
533c21
+    // Choose log levels appropriately if we have a result
533c21
+    if ((result->execution_status == PCMK_EXEC_DONE)
533c21
+        && (result->exit_status == CRM_EX_OK))  { // Success
533c21
         log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE;
533c21
-        if ((output != NULL)
533c21
+        if ((result->action_stdout != NULL)
533c21
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
533c21
             output_log_level = LOG_DEBUG;
533c21
         }
533c21
         next = NULL;
533c21
     } else { // Failure
533c21
         log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR;
533c21
-        if ((output != NULL)
533c21
+        if ((result->action_stdout != NULL)
533c21
             && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
533c21
             output_log_level = LOG_WARNING;
533c21
         }
533c21
@@ -2347,10 +2347,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
533c21
     }
533c21
     g_string_append_printf(msg, "using %s ", cmd->device);
533c21
 
533c21
-    // Add result
533c21
-    g_string_append_printf(msg, "returned %d (%s)", rc, pcmk_strerror(rc));
533c21
+    // Add exit status or execution status as appropriate
533c21
+    if (result->execution_status == PCMK_EXEC_DONE) {
533c21
+        g_string_append_printf(msg, "returned %d", result->exit_status);
533c21
+    } else {
533c21
+        g_string_append_printf(msg, "could not be executed: %s",
533c21
+                               pcmk_exec_status_str(result->execution_status));
533c21
+    }
533c21
 
533c21
-    // Add next device if appropriate
533c21
+    // Add exit reason and next device if appropriate
533c21
+    if (result->exit_reason != NULL) {
533c21
+        g_string_append_printf(msg, " (%s)", result->exit_reason);
533c21
+    }
533c21
     if (next != NULL) {
533c21
         g_string_append_printf(msg, ", retrying with %s", next);
533c21
     }
533c21
@@ -2371,7 +2379,7 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
533c21
     if (output_log_level != LOG_NEVER) {
533c21
         char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
533c21
 
533c21
-        crm_log_output(output_log_level, prefix, output);
533c21
+        crm_log_output(output_log_level, prefix, result->action_stdout);
533c21
         free(prefix);
533c21
     }
533c21
 }
533c21
@@ -2391,14 +2399,9 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
 {
533c21
     xmlNode *reply = NULL;
533c21
     gboolean bcast = FALSE;
533c21
-    const char *output = NULL;
533c21
-    int rc = pcmk_ok;
533c21
 
533c21
     CRM_CHECK((cmd != NULL) && (result != NULL), return);
533c21
 
533c21
-    output = result->action_stdout;
533c21
-    rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
-
533c21
     reply = construct_async_reply(cmd, result);
533c21
 
533c21
     // Only replies for certain actions are broadcast
533c21
@@ -2412,7 +2415,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
         bcast = TRUE;
533c21
     }
533c21
 
533c21
-    log_async_result(cmd, rc, pid, NULL, output, merged);
533c21
+    log_async_result(cmd, result, pid, NULL, merged);
533c21
     crm_log_xml_trace(reply, "Reply");
533c21
 
533c21
     if (merged) {
533c21
@@ -2436,6 +2439,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
533c21
     if (stand_alone) {
533c21
         /* Do notification with a clean data object */
533c21
         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
533c21
+        int rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
 
533c21
         crm_xml_add_int(notify_data, F_STONITH_RC, rc);
533c21
         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
533c21
@@ -2521,8 +2525,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
533c21
 
533c21
     /* this operation requires more fencing, hooray! */
533c21
     if (next_device) {
533c21
-        log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
533c21
-                         next_device->id, result->action_stdout, FALSE);
533c21
+        log_async_result(cmd, result, pid, next_device->id, false);
533c21
         schedule_stonith_command(cmd, next_device);
533c21
         /* Prevent cmd from being freed */
533c21
         cmd = NULL;
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 9f9dea518da50f629589d505ea0f330a47111d76 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 28 Oct 2021 13:29:31 -0500
533c21
Subject: [PATCH 10/12] Test: cts-fencing: update expected log messages
533c21
533c21
... which now log the original exit status rather than a mapped legacy rc
533c21
---
533c21
 cts/cts-fencing.in | 28 ++++++++++++++--------------
533c21
 1 file changed, 14 insertions(+), 14 deletions(-)
533c21
533c21
diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in
533c21
index babfb6351..5cd9f7b8f 100644
533c21
--- a/cts/cts-fencing.in
533c21
+++ b/cts/cts-fencing.in
533c21
@@ -886,7 +886,7 @@ class Tests(object):
533c21
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
533c21
 
533c21
             test.add_stonith_log_pattern("Total timeout set to 40")
533c21
-            test.add_stonith_log_pattern("targeting node3 using false returned -201")
533c21
+            test.add_stonith_log_pattern("targeting node3 using false returned 1")
533c21
             test.add_stonith_log_pattern("targeting node3 using true returned 0")
533c21
 
533c21
         # test what happens when the first fencing level fails.
533c21
@@ -920,8 +920,8 @@ class Tests(object):
533c21
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3")
533c21
 
533c21
             test.add_stonith_log_pattern("Total timeout set to 18")
533c21
-            test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
533c21
-            test.add_stonith_log_pattern("targeting node3 using false2 returned -201")
533c21
+            test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
533c21
+            test.add_stonith_log_pattern("targeting node3 using false2 returned 1")
533c21
             test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
533c21
             test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
533c21
 
533c21
@@ -987,7 +987,7 @@ class Tests(object):
533c21
             test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
533c21
 
533c21
             test.add_stonith_log_pattern("Total timeout set to 8")
533c21
-            test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
533c21
+            test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
533c21
             test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ")
533c21
             test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
533c21
             test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
533c21
@@ -1147,7 +1147,7 @@ class Tests(object):
533c21
                          "--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
533c21
             test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
533c21
             test.add_stonith_log_pattern("does not support reboot")
533c21
-            test.add_stonith_log_pattern("using true1 returned 0 (OK)")
533c21
+            test.add_stonith_log_pattern("using true1 returned 0")
533c21
 
533c21
         # make sure reboot is used when reboot action is advertised
533c21
         for test_type in test_types:
533c21
@@ -1158,7 +1158,7 @@ class Tests(object):
533c21
                          "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
533c21
             test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
533c21
             test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
533c21
-            test.add_stonith_log_pattern("using true1 returned 0 (OK)")
533c21
+            test.add_stonith_log_pattern("using true1 returned 0")
533c21
 
533c21
         # make sure requested fencing delay is applied only for the first device in the first level
533c21
         # make sure static delay from pcmk_delay_base is added
533c21
@@ -1240,8 +1240,8 @@ class Tests(object):
533c21
                      '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
533c21
         # both devices should be executed
533c21
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
533c21
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
533c21
+        test.add_stonith_log_pattern("using true1 returned 0")
533c21
+        test.add_stonith_log_pattern("using true2 returned 0")
533c21
 
533c21
         ### verify unfencing using automatic unfencing fails if any of the required agents fail
533c21
         test = self.new_test("cpg_unfence_required_2",
533c21
@@ -1264,8 +1264,8 @@ class Tests(object):
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
533c21
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
533c21
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
533c21
+        test.add_stonith_log_pattern("using true1 returned 0")
533c21
+        test.add_stonith_log_pattern("using true2 returned 0")
533c21
 
533c21
         ### verify unfencing using automatic devices with topology
533c21
         test = self.new_test("cpg_unfence_required_4",
533c21
@@ -1296,10 +1296,10 @@ class Tests(object):
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname))
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname))
533c21
         test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
533c21
-        test.add_stonith_log_pattern("using true1 returned 0 (OK)")
533c21
-        test.add_stonith_log_pattern("using true2 returned 0 (OK)")
533c21
-        test.add_stonith_log_pattern("using true3 returned 0 (OK)")
533c21
-        test.add_stonith_log_pattern("using true4 returned 0 (OK)")
533c21
+        test.add_stonith_log_pattern("using true1 returned 0")
533c21
+        test.add_stonith_log_pattern("using true2 returned 0")
533c21
+        test.add_stonith_log_pattern("using true3 returned 0")
533c21
+        test.add_stonith_log_pattern("using true4 returned 0")
533c21
 
533c21
     def build_unfence_on_target_tests(self):
533c21
         """ Register tests that verify unfencing that runs on the target """
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From be72166ed9ccb53c218529783660503df95da719 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Thu, 16 Sep 2021 16:50:23 -0500
533c21
Subject: [PATCH 11/12] Log: libcrmservice: downgrade failed action messages
533c21
533c21
Previously, we would often get duplicate log messages for failed actions,
533c21
from the service library and again from its callers.
533c21
533c21
Now that the service library tracks and provides exit reasons, callers can log
533c21
sufficient detail with better context, so downgrade the library's messages to
533c21
info level or lower. Similarly, avoid duplicate logs of process output.
533c21
533c21
Certain messages (such as out-of-memory) remain at higher severity.
533c21
---
533c21
 daemons/controld/controld_execd.c | 15 +++---
533c21
 lib/fencing/st_client.c           | 11 ++---
533c21
 lib/services/services.c           | 14 +++---
533c21
 lib/services/services_linux.c     | 80 ++++++++++++++++---------------
533c21
 lib/services/systemd.c            | 20 ++++----
533c21
 lib/services/upstart.c            | 19 ++++----
533c21
 6 files changed, 80 insertions(+), 79 deletions(-)
533c21
533c21
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
533c21
index bded6e6b6..3ddff6e13 100644
533c21
--- a/daemons/controld/controld_execd.c
533c21
+++ b/daemons/controld/controld_execd.c
533c21
@@ -2684,16 +2684,15 @@ log_executor_event(lrmd_event_data_t *op, const char *op_key,
533c21
     do_crm_log(log_level, "%s", str->str);
533c21
     g_string_free(str, TRUE);
533c21
 
533c21
-    if (op->output != NULL) {
533c21
-        char *prefix = crm_strdup_printf("%s-" PCMK__OP_FMT ":%d", node_name,
533c21
+    /* The services library has already logged the output at info or debug
533c21
+     * level, so just raise to notice if it looks like a failure.
533c21
+     */
533c21
+    if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
533c21
+        char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
533c21
                                          op->rsc_id, op->op_type,
533c21
-                                         op->interval_ms, op->call_id);
533c21
+                                         op->interval_ms, node_name);
533c21
 
533c21
-        if (op->rc) {
533c21
-            crm_log_output(LOG_NOTICE, prefix, op->output);
533c21
-        } else {
533c21
-            crm_log_output(LOG_DEBUG, prefix, op->output);
533c21
-        }
533c21
+        crm_log_output(LOG_NOTICE, prefix, op->output);
533c21
         free(prefix);
533c21
     }
533c21
 }
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index 3d4127eff..2fbff7f24 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -276,14 +276,9 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
533c21
 static void
533c21
 log_action(stonith_action_t *action, pid_t pid)
533c21
 {
533c21
-    if (action->result.action_stdout != NULL) {
533c21
-        /* Logging the whole string confuses syslog when the string is xml */
533c21
-        char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
533c21
-
533c21
-        crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
533c21
-        free(prefix);
533c21
-    }
533c21
-
533c21
+    /* The services library has already logged the output at info or debug
533c21
+     * level, so just raise to warning for stderr.
533c21
+     */
533c21
     if (action->result.action_stderr != NULL) {
533c21
         /* Logging the whole string confuses syslog when the string is xml */
533c21
         char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
533c21
diff --git a/lib/services/services.c b/lib/services/services.c
533c21
index 86a0a213c..cf8bbc70e 100644
533c21
--- a/lib/services/services.c
533c21
+++ b/lib/services/services.c
533c21
@@ -319,13 +319,13 @@ services__create_resource_action(const char *name, const char *standard,
533c21
         rc = services__nagios_prepare(op);
533c21
 #endif
533c21
     } else {
533c21
-        crm_err("Unknown resource standard: %s", op->standard);
533c21
+        crm_info("Unknown resource standard: %s", op->standard);
533c21
         rc = ENOENT;
533c21
     }
533c21
 
533c21
     if (rc != pcmk_rc_ok) {
533c21
-        crm_err("Cannot prepare %s operation for %s: %s",
533c21
-                action, name, strerror(rc));
533c21
+        crm_info("Cannot prepare %s operation for %s: %s",
533c21
+                 action, name, strerror(rc));
533c21
         services__handle_exec_error(op, rc);
533c21
     }
533c21
     return op;
533c21
@@ -967,14 +967,14 @@ execute_metadata_action(svc_action_t *op)
533c21
     const char *class = op->standard;
533c21
 
533c21
     if (op->agent == NULL) {
533c21
-        crm_err("meta-data requested without specifying agent");
533c21
+        crm_info("Meta-data requested without specifying agent");
533c21
         services__set_result(op, services__generic_error(op),
533c21
                              PCMK_EXEC_ERROR_FATAL, "Agent not specified");
533c21
         return EINVAL;
533c21
     }
533c21
 
533c21
     if (class == NULL) {
533c21
-        crm_err("meta-data requested for agent %s without specifying class",
533c21
+        crm_info("Meta-data requested for agent %s without specifying class",
533c21
                 op->agent);
533c21
         services__set_result(op, services__generic_error(op),
533c21
                              PCMK_EXEC_ERROR_FATAL,
533c21
@@ -986,8 +986,8 @@ execute_metadata_action(svc_action_t *op)
533c21
         class = resources_find_service_class(op->agent);
533c21
     }
533c21
     if (class == NULL) {
533c21
-        crm_err("meta-data requested for %s, but could not determine class",
533c21
-                op->agent);
533c21
+        crm_info("Meta-data requested for %s, but could not determine class",
533c21
+                 op->agent);
533c21
         services__set_result(op, services__generic_error(op),
533c21
                              PCMK_EXEC_ERROR_HARD,
533c21
                              "Agent standard could not be determined");
533c21
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
533c21
index b2ff27a0d..9a4c6cf80 100644
533c21
--- a/lib/services/services_linux.c
533c21
+++ b/lib/services/services_linux.c
533c21
@@ -64,8 +64,8 @@ sigchld_setup(struct sigchld_data_s *data)
533c21
 
533c21
     // Block SIGCHLD (saving previous set of blocked signals to restore later)
533c21
     if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=sigprocmask", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=sigprocmask", pcmk_strerror(errno));
533c21
         return false;
533c21
     }
533c21
     return true;
533c21
@@ -81,8 +81,8 @@ sigchld_open(struct sigchld_data_s *data)
533c21
 
533c21
     fd = signalfd(-1, &(data->mask), SFD_NONBLOCK);
533c21
     if (fd < 0) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=signalfd", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=signalfd", pcmk_strerror(errno));
533c21
     }
533c21
     return fd;
533c21
 }
533c21
@@ -108,8 +108,8 @@ sigchld_received(int fd)
533c21
     }
533c21
     s = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
533c21
     if (s != sizeof(struct signalfd_siginfo)) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=read", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=read", pcmk_strerror(errno));
533c21
 
533c21
     } else if (fdsi.ssi_signo == SIGCHLD) {
533c21
         return true;
533c21
@@ -149,8 +149,8 @@ sigchld_handler()
533c21
     if ((last_sigchld_data != NULL)
533c21
         && (last_sigchld_data->pipe_fd[1] >= 0)
533c21
         && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=write", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=write", pcmk_strerror(errno));
533c21
     }
533c21
 }
533c21
 
533c21
@@ -162,19 +162,19 @@ sigchld_setup(struct sigchld_data_s *data)
533c21
     data->pipe_fd[0] = data->pipe_fd[1] = -1;
533c21
 
533c21
     if (pipe(data->pipe_fd) == -1) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=pipe", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=pipe", pcmk_strerror(errno));
533c21
         return false;
533c21
     }
533c21
 
533c21
     rc = pcmk__set_nonblocking(data->pipe_fd[0]);
533c21
     if (rc != pcmk_rc_ok) {
533c21
-        crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
533c21
+        crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
533c21
                  pcmk_rc_str(rc), rc);
533c21
     }
533c21
     rc = pcmk__set_nonblocking(data->pipe_fd[1]);
533c21
     if (rc != pcmk_rc_ok) {
533c21
-        crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
533c21
+        crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
533c21
                  pcmk_rc_str(rc), rc);
533c21
     }
533c21
 
533c21
@@ -183,8 +183,8 @@ sigchld_setup(struct sigchld_data_s *data)
533c21
     data->sa.sa_flags = 0;
533c21
     sigemptyset(&(data->sa.sa_mask));
533c21
     if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) {
533c21
-        crm_err("Wait for child process completion failed: %s "
533c21
-                CRM_XS " source=sigaction", pcmk_strerror(errno));
533c21
+        crm_info("Wait for child process completion failed: %s "
533c21
+                 CRM_XS " source=sigaction", pcmk_strerror(errno));
533c21
     }
533c21
 
533c21
     // Remember data for use in signal handler
533c21
@@ -585,7 +585,11 @@ log_op_output(svc_action_t *op)
533c21
 {
533c21
     char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid);
533c21
 
533c21
-    crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
533c21
+    /* The library caller has better context to know how important the output
533c21
+     * is, so log it at info and debug severity here. They can log it again at
533c21
+     * higher severity if appropriate.
533c21
+     */
533c21
+    crm_log_output(LOG_INFO, prefix, op->stderr_data);
533c21
     strcpy(prefix + strlen(prefix) - strlen("error output"), "output");
533c21
     crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
533c21
     free(prefix);
533c21
@@ -673,7 +677,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
533c21
         parse_exit_reason_from_stderr(op);
533c21
 
533c21
     } else if (mainloop_child_timeout(p)) {
533c21
-        crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
533c21
+        crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
533c21
         services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
533c21
                              "Process did not exit within specified timeout");
533c21
 
533c21
@@ -686,7 +690,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
533c21
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL);
533c21
 
533c21
     } else {
533c21
-        crm_warn("%s[%d] terminated with signal %d (%s)",
533c21
+        crm_info("%s[%d] terminated with signal %d (%s)",
533c21
                  op->id, op->pid, signo, strsignal(signo));
533c21
         services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
533c21
                              "Process interrupted by signal");
533c21
@@ -908,12 +912,12 @@ action_launch_child(svc_action_t *op)
533c21
         sp.sched_priority = 0;
533c21
 
533c21
         if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
533c21
-            crm_warn("Could not reset scheduling policy for %s", op->id);
533c21
+            crm_info("Could not reset scheduling policy for %s", op->id);
533c21
         }
533c21
     }
533c21
 #endif
533c21
     if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
533c21
-        crm_warn("Could not reset process priority for %s", op->id);
533c21
+        crm_info("Could not reset process priority for %s", op->id);
533c21
     }
533c21
 
533c21
     /* Man: The call setpgrp() is equivalent to setpgid(0,0)
533c21
@@ -941,7 +945,7 @@ action_launch_child(svc_action_t *op)
533c21
         } else {
533c21
             crm_err("Considering %s unconfigured "
533c21
                     "because unable to load CIB secrets: %s",
533c21
-                     op->rsc, pcmk_rc_str(rc));
533c21
+                    op->rsc, pcmk_rc_str(rc));
533c21
             exit_child(op, services__configuration_error(op, false),
533c21
                        "Unable to load CIB secrets");
533c21
         }
533c21
@@ -1043,7 +1047,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
533c21
 
533c21
                 } else if (wait_rc < 0) {
533c21
                     wait_reason = pcmk_rc_str(errno);
533c21
-                    crm_warn("Wait for completion of %s[%d] failed: %s "
533c21
+                    crm_info("Wait for completion of %s[%d] failed: %s "
533c21
                              CRM_XS " source=waitpid",
533c21
                              op->id, op->pid, wait_reason);
533c21
                     wait_rc = 0; // Act as if process is still running
533c21
@@ -1057,8 +1061,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
533c21
 
533c21
         } else if ((poll_rc < 0) && (errno != EINTR)) {
533c21
             wait_reason = pcmk_rc_str(errno);
533c21
-            crm_err("Wait for completion of %s[%d] failed: %s "
533c21
-                    CRM_XS " source=poll", op->id, op->pid, wait_reason);
533c21
+            crm_info("Wait for completion of %s[%d] failed: %s "
533c21
+                     CRM_XS " source=poll", op->id, op->pid, wait_reason);
533c21
             break;
533c21
         }
533c21
 
533c21
@@ -1078,7 +1082,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
533c21
             services__set_result(op, services__generic_error(op),
533c21
                                  PCMK_EXEC_TIMEOUT,
533c21
                                  "Process did not exit within specified timeout");
533c21
-            crm_warn("%s[%d] timed out after %dms",
533c21
+            crm_info("%s[%d] timed out after %dms",
533c21
                      op->id, op->pid, op->timeout);
533c21
 
533c21
         } else {
533c21
@@ -1110,8 +1114,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
533c21
 
533c21
         services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
533c21
                              "Process interrupted by signal");
533c21
-        crm_err("%s[%d] terminated with signal %d (%s)",
533c21
-                op->id, op->pid, signo, strsignal(signo));
533c21
+        crm_info("%s[%d] terminated with signal %d (%s)",
533c21
+                 op->id, op->pid, signo, strsignal(signo));
533c21
 
533c21
 #ifdef WCOREDUMP
533c21
         if (WCOREDUMP(status)) {
533c21
@@ -1155,7 +1159,7 @@ services__execute_file(svc_action_t *op)
533c21
     // Catch common failure conditions early
533c21
     if (stat(op->opaque->exec, &st) != 0) {
533c21
         rc = errno;
533c21
-        crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
533c21
+        crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
533c21
                  op->opaque->exec, pcmk_strerror(rc), rc);
533c21
         services__handle_exec_error(op, rc);
533c21
         goto done;
533c21
@@ -1163,8 +1167,8 @@ services__execute_file(svc_action_t *op)
533c21
 
533c21
     if (pipe(stdout_fd) < 0) {
533c21
         rc = errno;
533c21
-        crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
533c21
-                op->opaque->exec, pcmk_strerror(rc), rc);
533c21
+        crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
533c21
+                 op->opaque->exec, pcmk_strerror(rc), rc);
533c21
         services__handle_exec_error(op, rc);
533c21
         goto done;
533c21
     }
533c21
@@ -1174,8 +1178,8 @@ services__execute_file(svc_action_t *op)
533c21
 
533c21
         close_pipe(stdout_fd);
533c21
 
533c21
-        crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
533c21
-                op->opaque->exec, pcmk_strerror(rc), rc);
533c21
+        crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
533c21
+                 op->opaque->exec, pcmk_strerror(rc), rc);
533c21
         services__handle_exec_error(op, rc);
533c21
         goto done;
533c21
     }
533c21
@@ -1187,8 +1191,8 @@ services__execute_file(svc_action_t *op)
533c21
             close_pipe(stdout_fd);
533c21
             close_pipe(stderr_fd);
533c21
 
533c21
-            crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
533c21
-                    op->opaque->exec, pcmk_strerror(rc), rc);
533c21
+            crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
533c21
+                     op->opaque->exec, pcmk_strerror(rc), rc);
533c21
             services__handle_exec_error(op, rc);
533c21
             goto done;
533c21
         }
533c21
@@ -1212,8 +1216,8 @@ services__execute_file(svc_action_t *op)
533c21
             close_pipe(stdout_fd);
533c21
             close_pipe(stderr_fd);
533c21
 
533c21
-            crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
533c21
-                    op->opaque->exec, pcmk_strerror(rc), rc);
533c21
+            crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
533c21
+                     op->opaque->exec, pcmk_strerror(rc), rc);
533c21
             services__handle_exec_error(op, rc);
533c21
             if (op->synchronous) {
533c21
                 sigchld_cleanup(&data);
533c21
@@ -1271,7 +1275,7 @@ services__execute_file(svc_action_t *op)
533c21
     op->opaque->stdout_fd = stdout_fd[0];
533c21
     rc = pcmk__set_nonblocking(op->opaque->stdout_fd);
533c21
     if (rc != pcmk_rc_ok) {
533c21
-        crm_warn("Could not set '%s' output non-blocking: %s "
533c21
+        crm_info("Could not set '%s' output non-blocking: %s "
533c21
                  CRM_XS " rc=%d",
533c21
                  op->opaque->exec, pcmk_rc_str(rc), rc);
533c21
     }
533c21
@@ -1279,7 +1283,7 @@ services__execute_file(svc_action_t *op)
533c21
     op->opaque->stderr_fd = stderr_fd[0];
533c21
     rc = pcmk__set_nonblocking(op->opaque->stderr_fd);
533c21
     if (rc != pcmk_rc_ok) {
533c21
-        crm_warn("Could not set '%s' error output non-blocking: %s "
533c21
+        crm_info("Could not set '%s' error output non-blocking: %s "
533c21
                  CRM_XS " rc=%d",
533c21
                  op->opaque->exec, pcmk_rc_str(rc), rc);
533c21
     }
533c21
@@ -1290,7 +1294,7 @@ services__execute_file(svc_action_t *op)
533c21
         // as long as no other standard uses stdin_fd assume stonith
533c21
         rc = pcmk__set_nonblocking(op->opaque->stdin_fd);
533c21
         if (rc != pcmk_rc_ok) {
533c21
-            crm_warn("Could not set '%s' input non-blocking: %s "
533c21
+            crm_info("Could not set '%s' input non-blocking: %s "
533c21
                     CRM_XS " fd=%d,rc=%d", op->opaque->exec,
533c21
                     pcmk_rc_str(rc), op->opaque->stdin_fd, rc);
533c21
         }
533c21
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
533c21
index 6f5bef960..8e9fff484 100644
533c21
--- a/lib/services/systemd.c
533c21
+++ b/lib/services/systemd.c
533c21
@@ -232,7 +232,8 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
533c21
     }
533c21
 
533c21
     if (pcmk_dbus_find_error(pending, reply, &error)) {
533c21
-        crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);
533c21
+        crm_warn("Could not issue systemd reload %d: %s",
533c21
+                 reload_count, error.message);
533c21
         dbus_error_free(&error);
533c21
 
533c21
     } else {
533c21
@@ -291,8 +292,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
533c21
                              PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
533c21
     }
533c21
 
533c21
-    crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s",
533c21
-            op->action, op->agent, crm_str(op->rsc), error->message);
533c21
+    crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s",
533c21
+             op->action, op->agent, crm_str(op->rsc), error->message);
533c21
 }
533c21
 
533c21
 /*!
533c21
@@ -325,11 +326,11 @@ execute_after_loadunit(DBusMessage *reply, svc_action_t *op)
533c21
         if (op != NULL) {
533c21
             services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
533c21
                                  "systemd DBus method had unexpected reply");
533c21
-            crm_err("Could not load systemd unit %s for %s: "
533c21
-                    "DBus reply has unexpected type", op->agent, op->id);
533c21
+            crm_info("Could not load systemd unit %s for %s: "
533c21
+                     "DBus reply has unexpected type", op->agent, op->id);
533c21
         } else {
533c21
-            crm_err("Could not load systemd unit: "
533c21
-                    "DBus reply has unexpected type");
533c21
+            crm_info("Could not load systemd unit: "
533c21
+                     "DBus reply has unexpected type");
533c21
         }
533c21
 
533c21
     } else {
533c21
@@ -688,7 +689,7 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
533c21
 
533c21
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
533c21
                                      __func__, __LINE__)) {
533c21
-        crm_warn("DBus request for %s of %s succeeded but "
533c21
+        crm_info("DBus request for %s of %s succeeded but "
533c21
                  "return type was unexpected", op->action, crm_str(op->rsc));
533c21
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE,
533c21
                              "systemd DBus method had unexpected reply");
533c21
@@ -981,7 +982,8 @@ systemd_timeout_callback(gpointer p)
533c21
     svc_action_t * op = p;
533c21
 
533c21
     op->opaque->timerid = 0;
533c21
-    crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
533c21
+    crm_info("%s action for systemd unit %s named '%s' timed out",
533c21
+             op->action, op->agent, op->rsc);
533c21
     services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
533c21
                          "Systemd action did not complete within specified timeout");
533c21
     services__finalize_async_op(op);
533c21
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
533c21
index 2fdc229ad..2ece803e1 100644
533c21
--- a/lib/services/upstart.c
533c21
+++ b/lib/services/upstart.c
533c21
@@ -308,21 +308,21 @@ get_first_instance(const gchar * job, int timeout)
533c21
     dbus_message_unref(msg);
533c21
 
533c21
     if (dbus_error_is_set(&error)) {
533c21
-        crm_err("Call to %s failed: %s", method, error.message);
533c21
+        crm_info("Call to %s failed: %s", method, error.message);
533c21
         dbus_error_free(&error);
533c21
         goto done;
533c21
 
533c21
     } else if(reply == NULL) {
533c21
-        crm_err("Call to %s failed: no reply", method);
533c21
+        crm_info("Call to %s failed: no reply", method);
533c21
         goto done;
533c21
 
533c21
     } else if (!dbus_message_iter_init(reply, &args)) {
533c21
-        crm_err("Call to %s failed: Message has no arguments", method);
533c21
+        crm_info("Call to %s failed: Message has no arguments", method);
533c21
         goto done;
533c21
     }
533c21
 
533c21
     if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) {
533c21
-        crm_err("Call to %s failed: Message has invalid arguments", method);
533c21
+        crm_info("Call to %s failed: Message has invalid arguments", method);
533c21
         goto done;
533c21
     }
533c21
 
533c21
@@ -432,8 +432,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
533c21
         return;
533c21
     }
533c21
 
533c21
-    crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s",
533c21
-            op->action, op->agent, crm_str(op->rsc), error->message);
533c21
+    crm_info("DBus request for %s of Upstart job %s for resource %s failed: %s",
533c21
+             op->action, op->agent, crm_str(op->rsc), error->message);
533c21
 }
533c21
 
533c21
 /*!
533c21
@@ -468,7 +468,7 @@ job_method_complete(DBusPendingCall *pending, void *user_data)
533c21
 
533c21
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
533c21
                                      __func__, __LINE__)) {
533c21
-        crm_warn("DBus request for %s of %s succeeded but "
533c21
+        crm_info("DBus request for %s of %s succeeded but "
533c21
                  "return type was unexpected", op->action, crm_str(op->rsc));
533c21
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
533c21
 
533c21
@@ -667,7 +667,8 @@ services__execute_upstart(svc_action_t *op)
533c21
 
533c21
     } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
533c21
                                      __func__, __LINE__)) {
533c21
-        crm_warn("Call to %s passed but return type was unexpected", op->action);
533c21
+        crm_info("Call to %s passed but return type was unexpected",
533c21
+                 op->action);
533c21
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
533c21
 
533c21
     } else {
533c21
@@ -675,7 +676,7 @@ services__execute_upstart(svc_action_t *op)
533c21
 
533c21
         dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path,
533c21
                               DBUS_TYPE_INVALID);
533c21
-        crm_info("Call to %s passed: %s", op->action, path);
533c21
+        crm_debug("Call to %s passed: %s", op->action, path);
533c21
         services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
533c21
     }
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 39f6861c72eb9dd76d2cf3da287fe7485615631b Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 8 Nov 2021 09:43:38 -0600
533c21
Subject: [PATCH 12/12] Low: fencing: avoid use-after-free with new result
533c21
 object
533c21
533c21
itnroduced by 153c9b552 (not released)
533c21
---
533c21
 lib/fencing/st_rhcs.c | 6 ++++--
533c21
 1 file changed, 4 insertions(+), 2 deletions(-)
533c21
533c21
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
533c21
index 23e694975..6c8cbedc7 100644
533c21
--- a/lib/fencing/st_rhcs.c
533c21
+++ b/lib/fencing/st_rhcs.c
533c21
@@ -143,15 +143,17 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
533c21
     if (result->execution_status != PCMK_EXEC_DONE) {
533c21
         crm_warn("Could not execute metadata action for %s: %s",
533c21
                  agent, pcmk_exec_status_str(result->execution_status));
533c21
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
         stonith__destroy_action(action);
533c21
-        return pcmk_rc2legacy(stonith__result2rc(result));
533c21
+        return rc;
533c21
     }
533c21
 
533c21
     if (result->exit_status != CRM_EX_OK) {
533c21
         crm_warn("Metadata action for %s returned error code %d",
533c21
                  agent, result->exit_status);
533c21
+        rc = pcmk_rc2legacy(stonith__result2rc(result));
533c21
         stonith__destroy_action(action);
533c21
-        return pcmk_rc2legacy(stonith__result2rc(result));
533c21
+        return rc;
533c21
     }
533c21
 
533c21
     if (result->action_stdout == NULL) {
533c21
-- 
533c21
2.27.0
533c21