Blame SOURCES/018-failure-messages.patch

533c21
From 08c3420f2c857e7b27cd960f355d787af534da7d Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 18 Jan 2022 16:04:49 -0600
533c21
Subject: [PATCH 01/12] Log: libcrmcommon: improve description for "not
533c21
 connected" status
533c21
533c21
PCMK_EXEC_NOT_CONNECTED was originally added to represent "No executor
533c21
connection", but it can also now mean no fencer connection, so change it to
533c21
"Internal communication failure" which is probably less mysterious to end users
533c21
anyway (especially since it should be accompanied by a more descriptive exit
533c21
reason).
533c21
---
533c21
 include/crm/common/results.h | 2 +-
533c21
 1 file changed, 1 insertion(+), 1 deletion(-)
533c21
533c21
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
533c21
index 873faf5c43..3d322a7ce6 100644
533c21
--- a/include/crm/common/results.h
533c21
+++ b/include/crm/common/results.h
533c21
@@ -349,7 +349,7 @@ pcmk_exec_status_str(enum pcmk_exec_status status)
533c21
         case PCMK_EXEC_ERROR_HARD:      return "Hard error";
533c21
         case PCMK_EXEC_ERROR_FATAL:     return "Fatal error";
533c21
         case PCMK_EXEC_NOT_INSTALLED:   return "Not installed";
533c21
-        case PCMK_EXEC_NOT_CONNECTED:   return "No executor connection";
533c21
+        case PCMK_EXEC_NOT_CONNECTED:   return "Internal communication failure";
533c21
         case PCMK_EXEC_INVALID:         return "Cannot execute now";
533c21
         case PCMK_EXEC_NO_FENCE_DEVICE: return "No fence device";
533c21
         case PCMK_EXEC_NO_SECRETS:      return "CIB secrets unavailable";
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 7c345cf8cf0cb054f5634206880df035bfef7311 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 15:12:36 -0600
533c21
Subject: [PATCH 02/12] Refactor: libcrmcommon: drop unnecessary system error
533c21
 redefinitions
533c21
533c21
portability.h defines some system error codes that might not be present on
533c21
non-Linux systems.
533c21
533c21
This was a bad idea, since there's no way to ensure the defined values don't
533c21
conflict with existing system codes. However, we use a number of them, so it's
533c21
probably best to keep them, at least until we can make a backward compatibility
533c21
break.
533c21
533c21
However, we don't use EUNATCH, ENOSR, or ENOSTR, so we can delete those.
533c21
---
533c21
 include/portability.h | 12 ------------
533c21
 lib/common/results.c  |  9 ++++++---
533c21
 2 files changed, 6 insertions(+), 15 deletions(-)
533c21
533c21
diff --git a/include/portability.h b/include/portability.h
533c21
index 9a60c583a7..ee065a376d 100644
533c21
--- a/include/portability.h
533c21
+++ b/include/portability.h
533c21
@@ -131,10 +131,6 @@ typedef union
533c21
 #    define EREMOTEIO 193
533c21
 #  endif
533c21
 
533c21
-#  ifndef EUNATCH
533c21
-#    define EUNATCH   194
533c21
-#  endif
533c21
-
533c21
 #  ifndef ENOKEY
533c21
 #    define ENOKEY    195
533c21
 #  endif
533c21
@@ -147,14 +143,6 @@ typedef union
533c21
 #    define ETIME     197
533c21
 #  endif
533c21
 
533c21
-#  ifndef ENOSR
533c21
-#    define ENOSR     198
533c21
-#  endif
533c21
-
533c21
-#  ifndef ENOSTR
533c21
-#    define ENOSTR    199
533c21
-#  endif
533c21
-
533c21
 #  ifndef EKEYREJECTED
533c21
 #    define EKEYREJECTED 200
533c21
 #  endif
533c21
diff --git a/lib/common/results.c b/lib/common/results.c
533c21
index 6d120694cd..96cd4e5659 100644
533c21
--- a/lib/common/results.c
533c21
+++ b/lib/common/results.c
533c21
@@ -118,9 +118,6 @@ pcmk_strerror(int rc)
533c21
         case EREMOTEIO:
533c21
             return "Remote I/O error";
533c21
             /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case EUNATCH:
533c21
-            return "Protocol driver not attached";
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
         case ENOKEY:
533c21
             return "Required key not available";
533c21
     }
533c21
@@ -342,8 +339,12 @@ pcmk_rc_name(int rc)
533c21
         case ENOMSG:            return "ENOMSG";
533c21
         case ENOPROTOOPT:       return "ENOPROTOOPT";
533c21
         case ENOSPC:            return "ENOSPC";
533c21
+#ifdef ENOSR
533c21
         case ENOSR:             return "ENOSR";
533c21
+#endif
533c21
+#ifdef ENOSTR
533c21
         case ENOSTR:            return "ENOSTR";
533c21
+#endif
533c21
         case ENOSYS:            return "ENOSYS";
533c21
         case ENOTBLK:           return "ENOTBLK";
533c21
         case ENOTCONN:          return "ENOTCONN";
533c21
@@ -376,7 +377,9 @@ pcmk_rc_name(int rc)
533c21
         case ETIME:             return "ETIME";
533c21
         case ETIMEDOUT:         return "ETIMEDOUT";
533c21
         case ETXTBSY:           return "ETXTBSY";
533c21
+#ifdef EUNATCH
533c21
         case EUNATCH:           return "EUNATCH";
533c21
+#endif
533c21
         case EUSERS:            return "EUSERS";
533c21
         /* case EWOULDBLOCK:    return "EWOULDBLOCK"; */
533c21
         case EXDEV:             return "EXDEV";
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From eac8d1ca51eac3f437e18584f7e013d976ecee2c Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 15:33:12 -0600
533c21
Subject: [PATCH 03/12] Log: libcrmcommon: improve handling of portability.h
533c21
 error codes
533c21
533c21
portability.h defines some system error codes that might not be present on
533c21
non-Linux systems.
533c21
533c21
Define a constant for each one (for example, PCMK__ECOMM for ECOMM) when
533c21
the system doesn't have the value, so we can detect that when relevant.
533c21
533c21
Also, make sure pcmk_rc_name() and pcmk_rc_str() handle all of these values.
533c21
---
533c21
 include/portability.h |  8 ++++++++
533c21
 lib/common/results.c  | 32 ++++++++++++++++++++++++++++++--
533c21
 2 files changed, 38 insertions(+), 2 deletions(-)
533c21
533c21
diff --git a/include/portability.h b/include/portability.h
533c21
index ee065a376d..5d5fbf21cb 100644
533c21
--- a/include/portability.h
533c21
+++ b/include/portability.h
533c21
@@ -116,34 +116,42 @@ typedef union
533c21
 #  include <errno.h>
533c21
 
533c21
 #  ifndef ENOTUNIQ
533c21
+#    define PCMK__ENOTUNIQ
533c21
 #    define ENOTUNIQ  190
533c21
 #  endif
533c21
 
533c21
 #  ifndef ECOMM
533c21
+#    define PCMK__ECOMM
533c21
 #    define ECOMM     191
533c21
 #  endif
533c21
 
533c21
 #  ifndef ELIBACC
533c21
+#    define PCMK__ELIBACC
533c21
 #    define ELIBACC   192
533c21
 #  endif
533c21
 
533c21
 #  ifndef EREMOTEIO
533c21
+#    define PCMK__EREMOTIO
533c21
 #    define EREMOTEIO 193
533c21
 #  endif
533c21
 
533c21
 #  ifndef ENOKEY
533c21
+#    define PCMK__ENOKEY
533c21
 #    define ENOKEY    195
533c21
 #  endif
533c21
 
533c21
 #  ifndef ENODATA
533c21
+#    define PCMK__ENODATA
533c21
 #    define ENODATA   196
533c21
 #  endif
533c21
 
533c21
 #  ifndef ETIME
533c21
+#    define PCMK__ETIME
533c21
 #    define ETIME     197
533c21
 #  endif
533c21
 
533c21
 #  ifndef EKEYREJECTED
533c21
+#    define PCMK__EKEYREJECTED
533c21
 #    define EKEYREJECTED 200
533c21
 #  endif
533c21
 
533c21
diff --git a/lib/common/results.c b/lib/common/results.c
533c21
index 96cd4e5659..bcf289d0d6 100644
533c21
--- a/lib/common/results.c
533c21
+++ b/lib/common/results.c
533c21
@@ -395,9 +395,9 @@ pcmk_rc_name(int rc)
533c21
 #ifdef EISNAM // Not available on OS X, Illumos, Solaris
533c21
         case EISNAM:            return "EISNAM";
533c21
         case EKEYEXPIRED:       return "EKEYEXPIRED";
533c21
-        case EKEYREJECTED:      return "EKEYREJECTED";
533c21
         case EKEYREVOKED:       return "EKEYREVOKED";
533c21
 #endif
533c21
+        case EKEYREJECTED:      return "EKEYREJECTED";
533c21
         case EL2HLT:            return "EL2HLT";
533c21
         case EL2NSYNC:          return "EL2NSYNC";
533c21
         case EL3HLT:            return "EL3HLT";
533c21
@@ -443,7 +443,35 @@ pcmk_rc_str(int rc)
533c21
     if (rc < 0) {
533c21
         return "Unknown error";
533c21
     }
533c21
-    return strerror(rc);
533c21
+
533c21
+    // Handle values that could be defined by system or by portability.h
533c21
+    switch (rc) {
533c21
+#ifdef PCMK__ENOTUNIQ
533c21
+        case ENOTUNIQ:      return "Name not unique on network";
533c21
+#endif
533c21
+#ifdef PCMK__ECOMM
533c21
+        case ECOMM:         return "Communication error on send";
533c21
+#endif
533c21
+#ifdef PCMK__ELIBACC
533c21
+        case ELIBACC:       return "Can not access a needed shared library";
533c21
+#endif
533c21
+#ifdef PCMK__EREMOTEIO
533c21
+        case EREMOTEIO:     return "Remote I/O error";
533c21
+#endif
533c21
+#ifdef PCMK__ENOKEY
533c21
+        case ENOKEY:        return "Required key not available";
533c21
+#endif
533c21
+#ifdef PCMK__ENODATA
533c21
+        case ENODATA:       return "No data available";
533c21
+#endif
533c21
+#ifdef PCMK__ETIME
533c21
+        case ETIME:         return "Timer expired";
533c21
+#endif
533c21
+#ifdef PCMK__EKEYREJECTED
533c21
+        case EKEYREJECTED:  return "Key was rejected by service";
533c21
+#endif
533c21
+        default:            return strerror(rc);
533c21
+    }
533c21
 }
533c21
 
533c21
 // This returns negative values for errors
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 32a38ac6374f85c43e7f4051f5e519822cc481e6 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 15:39:19 -0600
533c21
Subject: [PATCH 04/12] Log: libcrmcommon: redefine pcmk_strerror() in terms of
533c21
 pcmk_rc_str()
533c21
533c21
... to reduce code duplication. This causes minor differences in the string for
533c21
a few values.
533c21
---
533c21
 lib/common/results.c | 67 +-------------------------------------------
533c21
 1 file changed, 1 insertion(+), 66 deletions(-)
533c21
533c21
diff --git a/lib/common/results.c b/lib/common/results.c
533c21
index bcf289d0d6..b2c6e8d553 100644
533c21
--- a/lib/common/results.c
533c21
+++ b/lib/common/results.c
533c21
@@ -57,72 +57,7 @@ pcmk_errorname(int rc)
533c21
 const char *
533c21
 pcmk_strerror(int rc)
533c21
 {
533c21
-    if (rc == 0) {
533c21
-        return "OK";
533c21
-    }
533c21
-
533c21
-    rc = abs(rc);
533c21
-
533c21
-    // Of course rc > 0 ... unless someone passed INT_MIN as rc
533c21
-    if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) {
533c21
-        return strerror(rc);
533c21
-    }
533c21
-
533c21
-    switch (rc) {
533c21
-        case pcmk_err_generic:
533c21
-            return "Generic Pacemaker error";
533c21
-        case pcmk_err_no_quorum:
533c21
-            return "Operation requires quorum";
533c21
-        case pcmk_err_schema_validation:
533c21
-            return "Update does not conform to the configured schema";
533c21
-        case pcmk_err_transform_failed:
533c21
-            return "Schema transform failed";
533c21
-        case pcmk_err_old_data:
533c21
-            return "Update was older than existing configuration";
533c21
-        case pcmk_err_diff_failed:
533c21
-            return "Application of an update diff failed";
533c21
-        case pcmk_err_diff_resync:
533c21
-            return "Application of an update diff failed, requesting a full refresh";
533c21
-        case pcmk_err_cib_modified:
533c21
-            return "The on-disk configuration was manually modified";
533c21
-        case pcmk_err_cib_backup:
533c21
-            return "Could not archive the previous configuration";
533c21
-        case pcmk_err_cib_save:
533c21
-            return "Could not save the new configuration to disk";
533c21
-        case pcmk_err_cib_corrupt:
533c21
-            return "Could not parse on-disk configuration";
533c21
-        case pcmk_err_multiple:
533c21
-            return "Resource active on multiple nodes";
533c21
-        case pcmk_err_node_unknown:
533c21
-            return "Node not found";
533c21
-        case pcmk_err_already:
533c21
-            return "Situation already as requested";
533c21
-        case pcmk_err_bad_nvpair:
533c21
-            return "Bad name/value pair given";
533c21
-        case pcmk_err_schema_unchanged:
533c21
-            return "Schema is already the latest available";
533c21
-        case pcmk_err_unknown_format:
533c21
-            return "Unknown output format";
533c21
-
533c21
-            /* The following cases will only be hit on systems for which they are non-standard */
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case ENOTUNIQ:
533c21
-            return "Name not unique on network";
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case ECOMM:
533c21
-            return "Communication error on send";
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case ELIBACC:
533c21
-            return "Can not access a needed shared library";
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case EREMOTEIO:
533c21
-            return "Remote I/O error";
533c21
-            /* coverity[dead_error_condition] False positive on non-Linux */
533c21
-        case ENOKEY:
533c21
-            return "Required key not available";
533c21
-    }
533c21
-    crm_err("Unknown error code: %d", rc);
533c21
-    return "Unknown error";
533c21
+    return pcmk_rc_str(pcmk_legacy2rc(rc));
533c21
 }
533c21
 
533c21
 // Standard Pacemaker API return codes
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 7c331d7e2275ffebbfd5e2f6432a6137a66ee5db Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 15:41:24 -0600
533c21
Subject: [PATCH 05/12] Log: libcrmcommon: don't say "Unknown error"
533c21
533c21
... which is unhelpful and annoying to users
533c21
---
533c21
 lib/common/results.c | 4 ++--
533c21
 1 file changed, 2 insertions(+), 2 deletions(-)
533c21
533c21
diff --git a/lib/common/results.c b/lib/common/results.c
533c21
index b2c6e8d553..5ffac76549 100644
533c21
--- a/lib/common/results.c
533c21
+++ b/lib/common/results.c
533c21
@@ -376,7 +376,7 @@ pcmk_rc_str(int rc)
533c21
         return pcmk__rcs[pcmk_rc_error - rc].desc;
533c21
     }
533c21
     if (rc < 0) {
533c21
-        return "Unknown error";
533c21
+        return "Error";
533c21
     }
533c21
 
533c21
     // Handle values that could be defined by system or by portability.h
533c21
@@ -768,7 +768,7 @@ bz2_strerror(int rc)
533c21
         case BZ_OUTBUFF_FULL:
533c21
             return "output data will not fit into the buffer provided";
533c21
     }
533c21
-    return "Unknown error";
533c21
+    return "Data compression error";
533c21
 }
533c21
 
533c21
 crm_exit_t
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 26883b4edda7d81bfcb79bd7b33bb3210beff110 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 16:01:39 -0600
533c21
Subject: [PATCH 06/12] Log: fencing: don't warn if cluster has no watchdog
533c21
 device
533c21
533c21
---
533c21
 lib/fencing/st_client.c | 7 ++++++-
533c21
 1 file changed, 6 insertions(+), 1 deletion(-)
533c21
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index b1de912b2a..a0f3119f3b 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -187,7 +187,12 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
533c21
                  * we drop in here - so as not to make remote nodes
533c21
                  * panic on that answer
533c21
                  */
533c21
-                crm_warn("watchdog-fencing-query failed");
533c21
+                if (rc == -ENODEV) {
533c21
+                    crm_notice("Cluster does not have watchdog fencing device");
533c21
+                } else {
533c21
+                    crm_warn("Could not check for watchdog fencing device: %s",
533c21
+                             pcmk_strerror(rc));
533c21
+                }
533c21
             } else if (list[0] == '\0') {
533c21
                 rv = TRUE;
533c21
             } else {
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 72b3c42232deaca64ffba9582598c59331203761 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Mon, 20 Dec 2021 16:22:49 -0600
533c21
Subject: [PATCH 07/12] Test: libcrmcommon: update pcmk_rc_str() unit test for
533c21
 recent change
533c21
533c21
---
533c21
 lib/common/tests/results/pcmk__results_test.c | 2 +-
533c21
 1 file changed, 1 insertion(+), 1 deletion(-)
533c21
533c21
diff --git a/lib/common/tests/results/pcmk__results_test.c b/lib/common/tests/results/pcmk__results_test.c
533c21
index 57a520c501..e08d4b6261 100644
533c21
--- a/lib/common/tests/results/pcmk__results_test.c
533c21
+++ b/lib/common/tests/results/pcmk__results_test.c
533c21
@@ -30,7 +30,7 @@ static void
533c21
 test_for_pcmk_rc_str(void **state) {
533c21
     assert_string_equal(pcmk_rc_str(pcmk_rc_error-1), "Unknown output format");
533c21
     assert_string_equal(pcmk_rc_str(pcmk_rc_ok), "OK");
533c21
-    assert_string_equal(pcmk_rc_str(-1), "Unknown error");
533c21
+    assert_string_equal(pcmk_rc_str(-1), "Error");
533c21
 }
533c21
 
533c21
 static void
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From c1ad3d6640f695321a83183c95fae2f105adc429 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Dec 2021 10:20:38 -0600
533c21
Subject: [PATCH 08/12] Test: cts-lab: update expected patterns for recent
533c21
 changes
533c21
533c21
---
533c21
 cts/lab/CTStests.py | 2 +-
533c21
 1 file changed, 1 insertion(+), 1 deletion(-)
533c21
533c21
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
533c21
index 62c832eb45..f4be998cfb 100644
533c21
--- a/cts/lab/CTStests.py
533c21
+++ b/cts/lab/CTStests.py
533c21
@@ -3055,7 +3055,7 @@ class RemoteStonithd(RemoteDriver):
533c21
             r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
533c21
             r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
533c21
             r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
533c21
-            r"error: Result of monitor operation for .* on remote-.*: No executor connection",
533c21
+            r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
533c21
         ]
533c21
 
533c21
         ignore_pats.extend(RemoteDriver.errorstoignore(self))
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From f272e2f526633c707e894b39c7c7bce3c14de898 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Dec 2021 15:40:49 -0600
533c21
Subject: [PATCH 09/12] Log: controller,libpacemaker: make history XML creation
533c21
 less chatty
533c21
533c21
Other messages with the same info will already be logged at higher severity
533c21
---
533c21
 daemons/controld/controld_execd.c      |  3 +--
533c21
 daemons/controld/controld_te_actions.c |  7 ++-----
533c21
 include/pcmki/pcmki_sched_utils.h      |  3 +--
533c21
 lib/pacemaker/pcmk_injections.c        |  3 +--
533c21
 lib/pacemaker/pcmk_sched_actions.c     | 12 +++++-------
533c21
 5 files changed, 10 insertions(+), 18 deletions(-)
533c21
533c21
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
533c21
index 15784e7687..52157fa5d4 100644
533c21
--- a/daemons/controld/controld_execd.c
533c21
+++ b/daemons/controld/controld_execd.c
533c21
@@ -693,9 +693,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
533c21
         caller_version = CRM_FEATURE_SET;
533c21
     }
533c21
 
533c21
-    crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
533c21
     xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
533c21
-                                      fsa_our_uname, src, LOG_DEBUG);
533c21
+                                      fsa_our_uname, src);
533c21
     if (xml_op == NULL) {
533c21
         return TRUE;
533c21
     }
533c21
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
533c21
index 63b7c72359..b0bcb8b2e4 100644
533c21
--- a/daemons/controld/controld_te_actions.c
533c21
+++ b/daemons/controld/controld_te_actions.c
533c21
@@ -181,7 +181,6 @@ controld_record_action_timeout(crm_action_t *action)
533c21
     lrmd_event_data_t *op = NULL;
533c21
     xmlNode *state = NULL;
533c21
     xmlNode *rsc = NULL;
533c21
-    xmlNode *xml_op = NULL;
533c21
     xmlNode *action_rsc = NULL;
533c21
 
533c21
     int rc = pcmk_ok;
533c21
@@ -245,12 +244,10 @@ controld_record_action_timeout(crm_action_t *action)
533c21
     op->user_data = pcmk__transition_key(transition_graph->id, action->id,
533c21
                                          target_rc, te_uuid);
533c21
 
533c21
-    xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
533c21
-                                      target, __func__, LOG_INFO);
533c21
+    pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
533c21
+                             __func__);
533c21
     lrmd_free_event(op);
533c21
 
533c21
-    crm_log_xml_trace(xml_op, "Action timeout");
533c21
-
533c21
     rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
533c21
     fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
533c21
     free_xml(state);
533c21
diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
533c21
index 68d60fc7db..144424a609 100644
533c21
--- a/include/pcmki/pcmki_sched_utils.h
533c21
+++ b/include/pcmki/pcmki_sched_utils.h
533c21
@@ -52,8 +52,7 @@ extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_wor
533c21
 
533c21
 xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
533c21
                                  const char *caller_version, int target_rc,
533c21
-                                 const char *node, const char *origin,
533c21
-                                 int level);
533c21
+                                 const char *node, const char *origin);
533c21
 
533c21
 #  define LOAD_STOPPED "load_stopped"
533c21
 
533c21
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
533c21
index 678c3f5dd2..1aa90a5a0b 100644
533c21
--- a/lib/pacemaker/pcmk_sched_transition.c
533c21
+++ b/lib/pacemaker/pcmk_sched_transition.c
533c21
@@ -201,8 +201,7 @@ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
533c21
 inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
533c21
 {
533c21
     return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
533c21
-                                    target_rc, NULL, crm_system_name,
533c21
-                                    LOG_TRACE);
533c21
+                                    target_rc, NULL, crm_system_name);
533c21
 }
533c21
 
533c21
 static xmlNode *
533c21
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
533c21
index f8200b0efc..4f63d3374d 100644
533c21
--- a/lib/pacemaker/pcmk_sched_utils.c
533c21
+++ b/lib/pacemaker/pcmk_sched_utils.c
533c21
@@ -892,14 +892,13 @@ add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
533c21
  * \param[in]     target_rc       Expected result of operation
533c21
  * \param[in]     node            Name of node on which operation was performed
533c21
  * \param[in]     origin          Arbitrary description of update source
533c21
- * \param[in]     level           A log message will be logged at this level
533c21
  *
533c21
  * \return Newly created XML node for history update
533c21
  */
533c21
 xmlNode *
533c21
 pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
533c21
                          const char *caller_version, int target_rc,
533c21
-                         const char *node, const char *origin, int level)
533c21
+                         const char *node, const char *origin)
533c21
 {
533c21
     char *key = NULL;
533c21
     char *magic = NULL;
533c21
@@ -912,11 +911,10 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
533c21
     const char *task = NULL;
533c21
 
533c21
     CRM_CHECK(op != NULL, return NULL);
533c21
-    do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
533c21
-               origin, op->rsc_id, op->op_type,
533c21
-               pcmk_exec_status_str(op->op_status), op->interval_ms);
533c21
-
533c21
-    crm_trace("DC version: %s", caller_version);
533c21
+    crm_trace("Creating history XML for %s-interval %s action for %s on %s "
533c21
+              "(DC version: %s, origin: %s)",
533c21
+              pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
533c21
+              ((node == NULL)? "no node" : node), caller_version, origin);
533c21
 
533c21
     task = op->op_type;
533c21
 
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 06b1da9e5345e0d1571042c11646fd7157961279 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Tue, 21 Dec 2021 17:09:44 -0600
533c21
Subject: [PATCH 10/12] Feature: controller: improve exit reason for internal
533c21
 timeouts
533c21
533c21
Functionize the part of controld_record_action_timeout() that creates a fake
533c21
executor event, into a new function synthesize_timeout_event(), and have it set
533c21
a more detailed exit reason describing what timed out.
533c21
---
533c21
 daemons/controld/controld_te_actions.c | 61 ++++++++++++++++++++------
533c21
 1 file changed, 48 insertions(+), 13 deletions(-)
533c21
533c21
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
533c21
index b0bcb8b2e4..de2fbb82bf 100644
533c21
--- a/daemons/controld/controld_te_actions.c
533c21
+++ b/daemons/controld/controld_te_actions.c
533c21
@@ -175,6 +175,53 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
533c21
     return TRUE;
533c21
 }
533c21
 
533c21
+/*!
533c21
+ * \internal
533c21
+ * \brief Synthesize an executor event for a resource action timeout
533c21
+ *
533c21
+ * \param[in] action     Resource action that timed out
533c21
+ * \param[in] target_rc  Expected result of action that timed out
533c21
+ *
533c21
+ * Synthesize an executor event for a resource action timeout. (If the executor
533c21
+ * gets a timeout while waiting for a resource action to complete, that will be
533c21
+ * reported via the usual callback. This timeout means we didn't hear from the
533c21
+ * executor itself or the controller that relayed the action to the executor.)
533c21
+ *
533c21
+ * \return Newly created executor event for result of \p action
533c21
+ * \note The caller is responsible for freeing the return value using
533c21
+ *       lrmd_free_event().
533c21
+ */
533c21
+static lrmd_event_data_t *
533c21
+synthesize_timeout_event(crm_action_t *action, int target_rc)
533c21
+{
533c21
+    lrmd_event_data_t *op = NULL;
533c21
+    const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
533c21
+    const char *reason = NULL;
533c21
+    char *dynamic_reason = NULL;
533c21
+
533c21
+    if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
533c21
+        reason = "Local executor did not return result in time";
533c21
+    } else {
533c21
+        const char *router_node = NULL;
533c21
+
533c21
+        router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
533c21
+        if (router_node == NULL) {
533c21
+            router_node = target;
533c21
+        }
533c21
+        dynamic_reason = crm_strdup_printf("Controller on %s did not return "
533c21
+                                           "result in time", router_node);
533c21
+        reason = dynamic_reason;
533c21
+    }
533c21
+
533c21
+    op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
533c21
+                                       PCMK_OCF_UNKNOWN_ERROR, reason);
533c21
+    op->call_id = -1;
533c21
+    op->user_data = pcmk__transition_key(transition_graph->id, action->id,
533c21
+                                         target_rc, te_uuid);
533c21
+    free(dynamic_reason);
533c21
+    return op;
533c21
+}
533c21
+
533c21
 void
533c21
 controld_record_action_timeout(crm_action_t *action)
533c21
 {
533c21
@@ -231,19 +278,7 @@ controld_record_action_timeout(crm_action_t *action)
533c21
     crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
533c21
     crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
533c21
 
533c21
-    /* If the executor gets a timeout while waiting for the action to complete,
533c21
-     * that will be reported via the usual callback. This timeout means that we
533c21
-     * didn't hear from the executor or the controller that relayed the action
533c21
-     * to the executor.
533c21
-     */
533c21
-    op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
533c21
-                                       PCMK_OCF_UNKNOWN_ERROR,
533c21
-                                       "Cluster communication timeout "
533c21
-                                       "(no response from executor)");
533c21
-    op->call_id = -1;
533c21
-    op->user_data = pcmk__transition_key(transition_graph->id, action->id,
533c21
-                                         target_rc, te_uuid);
533c21
-
533c21
+    op = synthesize_timeout_event(action, target_rc);
533c21
     pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
533c21
                              __func__);
533c21
     lrmd_free_event(op);
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From be620d206faefab967d4c8567d6554d10c9e72ba Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Wed, 22 Dec 2021 16:35:06 -0600
533c21
Subject: [PATCH 11/12] Feature: fencing: improve exit reason for fencing
533c21
 timeouts
533c21
533c21
Troubleshooting timeouts is one of the more difficult aspects of cluster
533c21
maintenance. We want to give as much of a hint as possible, but for fencing in
533c21
particular it is difficult because an operation might involve multiple retries
533c21
of multiple devices.
533c21
533c21
Barring another major project to track exactly which devices, retries, etc.,
533c21
were used in a given operation, these changes in wording are probably the best
533c21
we can do.
533c21
---
533c21
 daemons/fenced/fenced_remote.c | 8 +++++---
533c21
 lib/fencing/st_client.c        | 2 +-
533c21
 2 files changed, 6 insertions(+), 4 deletions(-)
533c21
533c21
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
533c21
index 1e237150c5..6eebb7381e 100644
533c21
--- a/daemons/fenced/fenced_remote.c
533c21
+++ b/daemons/fenced/fenced_remote.c
533c21
@@ -1,5 +1,5 @@
533c21
 /*
533c21
- * Copyright 2009-2021 the Pacemaker project contributors
533c21
+ * Copyright 2009-2022 the Pacemaker project contributors
533c21
  *
533c21
  * The version control history for this file may have further details.
533c21
  *
533c21
@@ -715,8 +715,10 @@ remote_op_timeout(gpointer userdata)
533c21
                   CRM_XS " id=%.8s",
533c21
                   op->action, op->target, op->client_name, op->id);
533c21
     } else {
533c21
-        finalize_timed_out_op(userdata, "Fencing could not be completed "
533c21
-                                        "within overall timeout");
533c21
+        finalize_timed_out_op(userdata, "Fencing did not complete within a "
533c21
+                                        "total timeout based on the "
533c21
+                                        "configured timeout and retries for "
533c21
+                                        "any devices attempted");
533c21
     }
533c21
     return G_SOURCE_REMOVE;
533c21
 }
533c21
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
533c21
index a0f3119f3b..718739b321 100644
533c21
--- a/lib/fencing/st_client.c
533c21
+++ b/lib/fencing/st_client.c
533c21
@@ -906,7 +906,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
533c21
     if (msg == NULL) {
533c21
         // Fencer didn't reply in time
533c21
         pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
533c21
-                         "Timeout waiting for reply from fencer");
533c21
+                         "Fencer accepted request but did not reply in time");
533c21
         CRM_LOG_ASSERT(call_id > 0);
533c21
 
533c21
     } else {
533c21
-- 
533c21
2.27.0
533c21
533c21
533c21
From 0fe8ede2f8e838e335fe42846bdf147111ce9955 Mon Sep 17 00:00:00 2001
533c21
From: Ken Gaillot <kgaillot@redhat.com>
533c21
Date: Wed, 22 Dec 2021 17:09:09 -0600
533c21
Subject: [PATCH 12/12] Feature: libcrmservice: improve exit reason for
533c21
 timeouts
533c21
533c21
The services library doesn't have enough information about an action to say
533c21
(for example) what configuration parameters might be relevant, but we can at
533c21
least distinguish what kind of agent timed out.
533c21
---
533c21
 lib/services/services_linux.c | 12 +++++++++++-
533c21
 lib/services/systemd.c        |  2 +-
533c21
 2 files changed, 12 insertions(+), 2 deletions(-)
533c21
533c21
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
533c21
index f15eee860e..d6aafcfe46 100644
533c21
--- a/lib/services/services_linux.c
533c21
+++ b/lib/services/services_linux.c
533c21
@@ -677,9 +677,19 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
533c21
         parse_exit_reason_from_stderr(op);
533c21
 
533c21
     } else if (mainloop_child_timeout(p)) {
533c21
+        const char *reason = NULL;
533c21
+
533c21
+        if (op->rsc != NULL) {
533c21
+            reason = "Resource agent did not complete in time";
533c21
+        } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
533c21
+                                pcmk__str_none)) {
533c21
+            reason = "Fence agent did not complete in time";
533c21
+        } else {
533c21
+            reason = "Process did not complete in time";
533c21
+        }
533c21
         crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
533c21
         services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
533c21
-                             "Process did not exit within specified timeout");
533c21
+                             reason);
533c21
 
533c21
     } else if (op->cancel) {
533c21
         /* If an in-flight recurring operation was killed because it was
533c21
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
533c21
index 27a3b376db..d87b287424 100644
533c21
--- a/lib/services/systemd.c
533c21
+++ b/lib/services/systemd.c
533c21
@@ -995,7 +995,7 @@ systemd_timeout_callback(gpointer p)
533c21
     crm_info("%s action for systemd unit %s named '%s' timed out",
533c21
              op->action, op->agent, op->rsc);
533c21
     services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
533c21
-                         "Systemd action did not complete within specified timeout");
533c21
+                         "Systemd unit action did not complete in time");
533c21
     services__finalize_async_op(op);
533c21
     return FALSE;
533c21
 }
533c21
-- 
533c21
2.27.0
533c21