Blame SOURCES/013-rolling-upgrade-monitor.patch

61c387
From d6e5e62f7c084fc0aabfae7e6391b4b59f63252f Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:40:24 -0500
61c387
Subject: [PATCH 01/24] Low: fencing: use a default timeout with metadata and
61c387
 validate
61c387
61c387
If the caller did not specify a timeout, use a default in
61c387
stonith_api_operations_t:metadata() and validate(). (Timeout is currently
61c387
ignored past that point, so this has no effect yet.)
61c387
61c387
Also, rename timeout argument for clarity.
61c387
---
61c387
 lib/fencing/st_client.c | 23 ++++++++++++++++-------
61c387
 1 file changed, 16 insertions(+), 7 deletions(-)
61c387
61c387
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
61c387
index 2b0d308..28791ff 100644
61c387
--- a/lib/fencing/st_client.c
61c387
+++ b/lib/fencing/st_client.c
61c387
@@ -504,7 +504,8 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
61c387
 
61c387
 static int
61c387
 stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
61c387
-                            const char *namespace, char **output, int timeout)
61c387
+                            const char *namespace, char **output,
61c387
+                            int timeout_sec)
61c387
 {
61c387
     /* By executing meta-data directly, we can get it from stonith_admin when
61c387
      * the cluster is not running, which is important for higher-level tools.
61c387
@@ -512,16 +513,20 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a
61c387
 
61c387
     enum stonith_namespace ns = stonith_get_namespace(agent, namespace);
61c387
 
61c387
+    if (timeout_sec <= 0) {
61c387
+        timeout_sec = CRMD_METADATA_CALL_TIMEOUT;
61c387
+    }
61c387
+
61c387
     crm_trace("Looking up metadata for %s agent %s",
61c387
               stonith_namespace2text(ns), agent);
61c387
 
61c387
     switch (ns) {
61c387
         case st_namespace_rhcs:
61c387
-            return stonith__rhcs_metadata(agent, timeout, output);
61c387
+            return stonith__rhcs_metadata(agent, timeout_sec, output);
61c387
 
61c387
 #if HAVE_STONITH_STONITH_H
61c387
         case st_namespace_lha:
61c387
-            return stonith__lha_metadata(agent, timeout, output);
61c387
+            return stonith__lha_metadata(agent, timeout_sec, output);
61c387
 #endif
61c387
 
61c387
         default:
61c387
@@ -1684,8 +1689,8 @@ stonith_api_delete(stonith_t * stonith)
61c387
 static int
61c387
 stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
61c387
                      const char *namespace_s, const char *agent,
61c387
-                     stonith_key_value_t *params, int timeout, char **output,
61c387
-                     char **error_output)
61c387
+                     stonith_key_value_t *params, int timeout_sec,
61c387
+                     char **output, char **error_output)
61c387
 {
61c387
     /* Validation should be done directly via the agent, so we can get it from
61c387
      * stonith_admin when the cluster is not running, which is important for
61c387
@@ -1731,17 +1736,21 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
61c387
         *error_output = NULL;
61c387
     }
61c387
 
61c387
+    if (timeout_sec <= 0) {
61c387
+        timeout_sec = CRMD_METADATA_CALL_TIMEOUT; // Questionable
61c387
+    }
61c387
+
61c387
     switch (stonith_get_namespace(agent, namespace_s)) {
61c387
         case st_namespace_rhcs:
61c387
             rc = stonith__rhcs_validate(st, call_options, target, agent,
61c387
-                                        params_table, host_arg, timeout,
61c387
+                                        params_table, host_arg, timeout_sec,
61c387
                                         output, error_output);
61c387
             break;
61c387
 
61c387
 #if HAVE_STONITH_STONITH_H
61c387
         case st_namespace_lha:
61c387
             rc = stonith__lha_validate(st, call_options, target, agent,
61c387
-                                       params_table, timeout, output,
61c387
+                                       params_table, timeout_sec, output,
61c387
                                        error_output);
61c387
             break;
61c387
 #endif
61c387
-- 
61c387
2.31.1
61c387
61c387
From 7404fc1238253b80eb97fd81af123c3f5aa1fde8 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:00:00 -0500
61c387
Subject: [PATCH 02/24] Doc: fencer: improve
61c387
 stonith_api_operations_t:metadata() description
61c387
61c387
---
61c387
 include/crm/stonith-ng.h | 15 +++++++++++----
61c387
 lib/fencing/st_client.c  |  7 ++++---
61c387
 2 files changed, 15 insertions(+), 7 deletions(-)
61c387
61c387
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
61c387
index 4fe52ef..a41d411 100644
61c387
--- a/include/crm/stonith-ng.h
61c387
+++ b/include/crm/stonith-ng.h
61c387
@@ -206,14 +206,21 @@ typedef struct stonith_api_operations_s
61c387
         stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
61c387
 
61c387
     /*!
61c387
-     * \brief Get the metadata documentation for a resource.
61c387
+     * \brief Retrieve a fence agent's metadata
61c387
      *
61c387
-     * \note Value is returned in output.  Output must be freed when set.
61c387
+     * \param[in,out] stonith       Fencer connection
61c387
+     * \param[in]     call_options  Group of enum stonith_call_options
61c387
+     *                              (currently ignored)
61c387
+     * \param[in]     agent         Fence agent to query
61c387
+     * \param[in]     namespace     Namespace of fence agent to query (optional)
61c387
+     * \param[out]    output        Where to store metadata
61c387
+     * \param[in]     timeout_sec   Error if not complete within this time
61c387
      *
61c387
      * \return Legacy Pacemaker return code
61c387
+     * \note The caller is responsible for freeing *output using free().
61c387
      */
61c387
-    int (*metadata)(stonith_t *st, int options,
61c387
-            const char *device, const char *provider, char **output, int timeout);
61c387
+    int (*metadata)(stonith_t *stonith, int call_options, const char *agent,
61c387
+                    const char *namespace, char **output, int timeout_sec);
61c387
 
61c387
     /*!
61c387
      * \brief Retrieve a list of installed stonith agents
61c387
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
61c387
index 28791ff..6c252bc 100644
61c387
--- a/lib/fencing/st_client.c
61c387
+++ b/lib/fencing/st_client.c
61c387
@@ -502,10 +502,11 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
61c387
     return count;
61c387
 }
61c387
 
61c387
+// See stonith_api_operations_t:metadata() documentation
61c387
 static int
61c387
-stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
61c387
-                            const char *namespace, char **output,
61c387
-                            int timeout_sec)
61c387
+stonith_api_device_metadata(stonith_t *stonith, int call_options,
61c387
+                            const char *agent, const char *namespace,
61c387
+                            char **output, int timeout_sec)
61c387
 {
61c387
     /* By executing meta-data directly, we can get it from stonith_admin when
61c387
      * the cluster is not running, which is important for higher-level tools.
61c387
-- 
61c387
2.31.1
61c387
61c387
From 7b5d1610231252e209a57ef6d82a23e3667812a0 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:16:54 -0500
61c387
Subject: [PATCH 03/24] Doc: fencing: add doxygen block for
61c387
 stonith__action_create()
61c387
61c387
... and rename a couple arguments for clarity
61c387
---
61c387
 include/crm/fencing/internal.h |  4 ++--
61c387
 lib/fencing/st_actions.c       | 33 ++++++++++++++++++++++++---------
61c387
 2 files changed, 26 insertions(+), 11 deletions(-)
61c387
61c387
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
61c387
index d2b49f8..e2ca85e 100644
61c387
--- a/include/crm/fencing/internal.h
61c387
+++ b/include/crm/fencing/internal.h
61c387
@@ -50,10 +50,10 @@ struct stonith_action_s;
61c387
 typedef struct stonith_action_s stonith_action_t;
61c387
 
61c387
 stonith_action_t *stonith_action_create(const char *agent,
61c387
-                                        const char *_action,
61c387
+                                        const char *action_name,
61c387
                                         const char *victim,
61c387
                                         uint32_t victim_nodeid,
61c387
-                                        int timeout,
61c387
+                                        int timeout_sec,
61c387
                                         GHashTable * device_args,
61c387
                                         GHashTable * port_map,
61c387
                                         const char * host_arg);
61c387
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
61c387
index b3429f6..d16fa33 100644
61c387
--- a/lib/fencing/st_actions.c
61c387
+++ b/lib/fencing/st_actions.c
61c387
@@ -232,27 +232,42 @@ stonith__action_result(stonith_action_t *action)
61c387
 }
61c387
 
61c387
 #define FAILURE_MAX_RETRIES 2
61c387
+
61c387
+/*!
61c387
+ * \internal
61c387
+ * \brief Create a new fencing action to be executed
61c387
+ *
61c387
+ * \param[in] agent          Fence agent to use
61c387
+ * \param[in] action_name    Fencing action to be executed
61c387
+ * \param[in] victim         Name of target of fencing action (if known)
61c387
+ * \param[in] victim_nodeid  Node ID of target of fencing action (if known)
61c387
+ * \param[in] timeout_sec    Timeout to be used when executing action
61c387
+ * \param[in] device_args    Parameters to pass to fence agent
61c387
+ * \param[in] port_map       Mapping of target names to device ports
61c387
+ * \param[in] host_arg       Agent parameter used to pass target name
61c387
+ *
61c387
+ * \return Newly created fencing action (asserts on error, never NULL)
61c387
+ */
61c387
 stonith_action_t *
61c387
 stonith_action_create(const char *agent,
61c387
-                      const char *_action,
61c387
+                      const char *action_name,
61c387
                       const char *victim,
61c387
                       uint32_t victim_nodeid,
61c387
-                      int timeout, GHashTable * device_args,
61c387
+                      int timeout_sec, GHashTable * device_args,
61c387
                       GHashTable * port_map, const char *host_arg)
61c387
 {
61c387
-    stonith_action_t *action;
61c387
+    stonith_action_t *action = calloc(1, sizeof(stonith_action_t));
61c387
 
61c387
-    action = calloc(1, sizeof(stonith_action_t));
61c387
     CRM_ASSERT(action != NULL);
61c387
 
61c387
-    action->args = make_args(agent, _action, victim, victim_nodeid,
61c387
+    action->args = make_args(agent, action_name, victim, victim_nodeid,
61c387
                              device_args, port_map, host_arg);
61c387
     crm_debug("Preparing '%s' action for %s using agent %s",
61c387
-              _action, (victim? victim : "no target"), agent);
61c387
+              action_name, (victim? victim : "no target"), agent);
61c387
     action->agent = strdup(agent);
61c387
-    action->action = strdup(_action);
61c387
+    action->action = strdup(action_name);
61c387
     pcmk__str_update(&action->victim, victim);
61c387
-    action->timeout = action->remaining_timeout = timeout;
61c387
+    action->timeout = action->remaining_timeout = timeout_sec;
61c387
     action->max_retries = FAILURE_MAX_RETRIES;
61c387
 
61c387
     pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
61c387
@@ -262,7 +277,7 @@ stonith_action_create(const char *agent,
61c387
         char buffer[512];
61c387
         const char *value = NULL;
61c387
 
61c387
-        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
61c387
+        snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name);
61c387
         value = g_hash_table_lookup(device_args, buffer);
61c387
 
61c387
         if (value) {
61c387
-- 
61c387
2.31.1
61c387
61c387
From a82339a81c451566b9835e61bca7e1bf84b97c46 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:20:24 -0500
61c387
Subject: [PATCH 04/24] Low: fencing: use requested timeout with RHCS metadata
61c387
 actions
61c387
61c387
... instead of hardcoded 5 seconds, and rename timeout argument for clarity
61c387
---
61c387
 lib/fencing/st_rhcs.c | 35 ++++++++++++++++-------------------
61c387
 1 file changed, 16 insertions(+), 19 deletions(-)
61c387
61c387
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
61c387
index dfccff2..1b3cd57 100644
61c387
--- a/lib/fencing/st_rhcs.c
61c387
+++ b/lib/fencing/st_rhcs.c
61c387
@@ -112,25 +112,24 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
61c387
 }
61c387
 
61c387
 /*!
61c387
- * \brief Execute RHCS-compatible agent's meta-data action
61c387
+ * \brief Execute RHCS-compatible agent's metadata action
61c387
  *
61c387
- * \param[in]  agent    Agent to execute
61c387
- * \param[in]  timeout  Action timeout
61c387
- * \param[out] metadata Where to store output xmlNode (or NULL to ignore)
61c387
- *
61c387
- * \todo timeout is currently ignored; shouldn't we use it?
61c387
+ * \param[in]  agent        Agent to execute
61c387
+ * \param[in]  timeout_sec  Action timeout
61c387
+ * \param[out] metadata     Where to store output xmlNode (or NULL to ignore)
61c387
  */
61c387
 static int
61c387
-stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
61c387
+stonith__rhcs_get_metadata(const char *agent, int timeout_sec,
61c387
+                           xmlNode **metadata)
61c387
 {
61c387
     xmlNode *xml = NULL;
61c387
     xmlNode *actions = NULL;
61c387
     xmlXPathObject *xpathObj = NULL;
61c387
-    pcmk__action_result_t *result = NULL;
61c387
-    stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
61c387
-                                                     5, NULL, NULL, NULL);
61c387
+    stonith_action_t *action = stonith_action_create(agent, "metadata", NULL,
61c387
+                                                     0, timeout_sec, NULL,
61c387
+                                                     NULL, NULL);
61c387
     int rc = stonith__execute(action);
61c387
-    result = stonith__action_result(action);
61c387
+    pcmk__action_result_t *result = stonith__action_result(action);
61c387
 
61c387
     if (result == NULL) {
61c387
         if (rc < 0) {
61c387
@@ -208,21 +207,19 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
61c387
 }
61c387
 
61c387
 /*!
61c387
- * \brief Execute RHCS-compatible agent's meta-data action
61c387
- *
61c387
- * \param[in]  agent    Agent to execute
61c387
- * \param[in]  timeout  Action timeout
61c387
- * \param[out] output   Where to store action output (or NULL to ignore)
61c387
+ * \brief Retrieve metadata for RHCS-compatible fence agent
61c387
  *
61c387
- * \todo timeout is currently ignored; shouldn't we use it?
61c387
+ * \param[in]  agent        Agent to execute
61c387
+ * \param[in]  timeout_sec  Action timeout
61c387
+ * \param[out] output       Where to store action output (or NULL to ignore)
61c387
  */
61c387
 int
61c387
-stonith__rhcs_metadata(const char *agent, int timeout, char **output)
61c387
+stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output)
61c387
 {
61c387
     char *buffer = NULL;
61c387
     xmlNode *xml = NULL;
61c387
 
61c387
-    int rc = stonith__rhcs_get_metadata(agent, timeout, &xml;;
61c387
+    int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml;;
61c387
 
61c387
     if (rc != pcmk_ok) {
61c387
         free_xml(xml);
61c387
-- 
61c387
2.31.1
61c387
61c387
From 73d12e7bcab4d12b1d60ac3431fb713a36c5b6d3 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:32:44 -0500
61c387
Subject: [PATCH 05/24] Refactor: fencing: make stonith_action_t:async bool
61c387
61c387
---
61c387
 lib/fencing/st_actions.c | 5 +++--
61c387
 1 file changed, 3 insertions(+), 2 deletions(-)
61c387
61c387
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
61c387
index d16fa33..abd0d5a 100644
61c387
--- a/lib/fencing/st_actions.c
61c387
+++ b/lib/fencing/st_actions.c
61c387
@@ -9,6 +9,7 @@
61c387
 
61c387
 #include <crm_internal.h>
61c387
 
61c387
+#include <stdbool.h>
61c387
 #include <stdlib.h>
61c387
 #include <stdio.h>
61c387
 #include <string.h>
61c387
@@ -32,7 +33,7 @@ struct stonith_action_s {
61c387
     char *victim;
61c387
     GHashTable *args;
61c387
     int timeout;
61c387
-    int async;
61c387
+    bool async;
61c387
     void *userdata;
61c387
     void (*done_cb) (int pid, const pcmk__action_result_t *result,
61c387
                      void *user_data);
61c387
@@ -671,7 +672,7 @@ stonith_action_execute_async(stonith_action_t * action,
61c387
     action->userdata = userdata;
61c387
     action->done_cb = done;
61c387
     action->fork_cb = fork_cb;
61c387
-    action->async = 1;
61c387
+    action->async = true;
61c387
 
61c387
     return internal_stonith_action_execute(action);
61c387
 }
61c387
-- 
61c387
2.31.1
61c387
61c387
From 537955ab3c19a1bfacc43fb739e38f036c5788fb Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 14:59:28 -0500
61c387
Subject: [PATCH 06/24] Refactor: fencing: rename
61c387
 stonith_action_execute_async()
61c387
61c387
... to stonith__execute_async(), since it's internal
61c387
---
61c387
 daemons/fenced/fenced_commands.c |  4 ++--
61c387
 include/crm/fencing/internal.h   | 12 +++++-------
61c387
 lib/fencing/st_actions.c         | 11 +++++------
61c387
 3 files changed, 12 insertions(+), 15 deletions(-)
61c387
61c387
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
61c387
index 94aa6b8..41a1936 100644
61c387
--- a/daemons/fenced/fenced_commands.c
61c387
+++ b/daemons/fenced/fenced_commands.c
61c387
@@ -510,8 +510,8 @@ stonith_device_execute(stonith_device_t * device)
61c387
     /* for async exec, exec_rc is negative for early error exit
61c387
        otherwise handling of success/errors is done via callbacks */
61c387
     cmd->activating_on = device;
61c387
-    exec_rc = stonith_action_execute_async(action, (void *)cmd,
61c387
-                                           cmd->done_cb, fork_cb);
61c387
+    exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
61c387
+                                     fork_cb);
61c387
     if (exec_rc < 0) {
61c387
         cmd->activating_on = NULL;
61c387
         cmd->done_cb(0, stonith__action_result(action), cmd);
61c387
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
61c387
index e2ca85e..1797d9a 100644
61c387
--- a/include/crm/fencing/internal.h
61c387
+++ b/include/crm/fencing/internal.h
61c387
@@ -64,13 +64,11 @@ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
61c387
 void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result);
61c387
 xmlNode *stonith__find_xe_with_result(xmlNode *xml);
61c387
 
61c387
-int
61c387
-stonith_action_execute_async(stonith_action_t * action,
61c387
-                             void *userdata,
61c387
-                             void (*done) (int pid,
61c387
-                                           const pcmk__action_result_t *result,
61c387
-                                           void *user_data),
61c387
-                             void (*fork_cb) (int pid, void *user_data));
61c387
+int stonith__execute_async(stonith_action_t *action, void *userdata,
61c387
+                           void (*done) (int pid,
61c387
+                                         const pcmk__action_result_t *result,
61c387
+                                         void *user_data),
61c387
+                           void (*fork_cb) (int pid, void *user_data));
61c387
 
61c387
 xmlNode *create_level_registration_xml(const char *node, const char *pattern,
61c387
                                        const char *attr, const char *value,
61c387
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
61c387
index abd0d5a..c4e32bd 100644
61c387
--- a/lib/fencing/st_actions.c
61c387
+++ b/lib/fencing/st_actions.c
61c387
@@ -658,12 +658,11 @@ internal_stonith_action_execute(stonith_action_t * action)
61c387
  * \return pcmk_ok if ownership of action has been taken, -errno otherwise
61c387
  */
61c387
 int
61c387
-stonith_action_execute_async(stonith_action_t * action,
61c387
-                             void *userdata,
61c387
-                             void (*done) (int pid,
61c387
-                                           const pcmk__action_result_t *result,
61c387
-                                           void *user_data),
61c387
-                             void (*fork_cb) (int pid, void *user_data))
61c387
+stonith__execute_async(stonith_action_t * action, void *userdata,
61c387
+                       void (*done) (int pid,
61c387
+                                     const pcmk__action_result_t *result,
61c387
+                                     void *user_data),
61c387
+                       void (*fork_cb) (int pid, void *user_data))
61c387
 {
61c387
     if (!action) {
61c387
         return -EINVAL;
61c387
-- 
61c387
2.31.1
61c387
61c387
From b56edde80bfe4cf900a5eb021986c8f6189d3307 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 16:43:57 -0500
61c387
Subject: [PATCH 07/24] Refactor: fencing: add internal API for getting
61c387
 metadata async
61c387
61c387
Nothing uses it yet
61c387
---
61c387
 include/crm/fencing/internal.h |  6 +++
61c387
 lib/fencing/st_client.c        | 80 ++++++++++++++++++++++++++++++++++
61c387
 2 files changed, 86 insertions(+)
61c387
61c387
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
61c387
index 1797d9a..513d1c4 100644
61c387
--- a/include/crm/fencing/internal.h
61c387
+++ b/include/crm/fencing/internal.h
61c387
@@ -70,6 +70,12 @@ int stonith__execute_async(stonith_action_t *action, void *userdata,
61c387
                                          void *user_data),
61c387
                            void (*fork_cb) (int pid, void *user_data));
61c387
 
61c387
+int stonith__metadata_async(const char *agent, int timeout_sec,
61c387
+                            void (*callback)(int pid,
61c387
+                                             const pcmk__action_result_t *result,
61c387
+                                             void *user_data),
61c387
+                            void *user_data);
61c387
+
61c387
 xmlNode *create_level_registration_xml(const char *node, const char *pattern,
61c387
                                        const char *attr, const char *value,
61c387
                                        int level,
61c387
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
61c387
index 6c252bc..91075bd 100644
61c387
--- a/lib/fencing/st_client.c
61c387
+++ b/lib/fencing/st_client.c
61c387
@@ -2386,6 +2386,86 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
61c387
     freeXpathObject(xpath);
61c387
 }
61c387
 
61c387
+/*!
61c387
+ * \internal
61c387
+ * \brief Retrieve fence agent meta-data asynchronously
61c387
+ *
61c387
+ * \param[in] agent        Agent to execute
61c387
+ * \param[in] timeout_sec  Error if not complete within this time
61c387
+ * \param[in] callback     Function to call with result (this will always be
61c387
+ *                         called, whether by this function directly or later
61c387
+ *                         via the main loop, and on success the metadata will
61c387
+ *                         be in its result argument's action_stdout)
61c387
+ * \param[in] user_data    User data to pass to callback
61c387
+ *
61c387
+ * \return Standard Pacemaker return code
61c387
+ * \note The caller must use a main loop. This function is not a
61c387
+ *       stonith_api_operations_t method because it does not need a stonith_t
61c387
+ *       object and does not go through the fencer, but executes the agent
61c387
+ *       directly.
61c387
+ */
61c387
+int
61c387
+stonith__metadata_async(const char *agent, int timeout_sec,
61c387
+                        void (*callback)(int pid,
61c387
+                                         const pcmk__action_result_t *result,
61c387
+                                         void *user_data),
61c387
+                        void *user_data)
61c387
+{
61c387
+    switch (stonith_get_namespace(agent, NULL)) {
61c387
+        case st_namespace_rhcs:
61c387
+            {
61c387
+                stonith_action_t *action = NULL;
61c387
+                int rc = pcmk_ok;
61c387
+
61c387
+                action = stonith_action_create(agent, "metadata", NULL, 0,
61c387
+                                               timeout_sec, NULL, NULL, NULL);
61c387
+
61c387
+                rc = stonith__execute_async(action, user_data, callback, NULL);
61c387
+                if (rc != pcmk_ok) {
61c387
+                    callback(0, stonith__action_result(action), user_data);
61c387
+                    stonith__destroy_action(action);
61c387
+                }
61c387
+                return pcmk_legacy2rc(rc);
61c387
+            }
61c387
+
61c387
+#if HAVE_STONITH_STONITH_H
61c387
+        case st_namespace_lha:
61c387
+            // LHA metadata is simply synthesized, so simulate async
61c387
+            {
61c387
+                pcmk__action_result_t result = {
61c387
+                    .exit_status = CRM_EX_OK,
61c387
+                    .execution_status = PCMK_EXEC_DONE,
61c387
+                    .exit_reason = NULL,
61c387
+                    .action_stdout = NULL,
61c387
+                    .action_stderr = NULL,
61c387
+                };
61c387
+
61c387
+                stonith__lha_metadata(agent, timeout_sec,
61c387
+                                      &result.action_stdout);
61c387
+                callback(0, &result, user_data);
61c387
+                pcmk__reset_result(&result);
61c387
+                return pcmk_rc_ok;
61c387
+            }
61c387
+#endif
61c387
+
61c387
+        default:
61c387
+            {
61c387
+                pcmk__action_result_t result = {
61c387
+                    .exit_status = CRM_EX_ERROR,
61c387
+                    .execution_status = PCMK_EXEC_ERROR_HARD,
61c387
+                    .exit_reason = crm_strdup_printf("No such agent '%s'",
61c387
+                                                     agent),
61c387
+                    .action_stdout = NULL,
61c387
+                    .action_stderr = NULL,
61c387
+                };
61c387
+
61c387
+                callback(0, &result, user_data);
61c387
+                pcmk__reset_result(&result);
61c387
+                return ENOENT;
61c387
+            }
61c387
+    }
61c387
+}
61c387
+
61c387
 /*!
61c387
  * \internal
61c387
  * \brief Return the exit status from an async action callback
61c387
-- 
61c387
2.31.1
61c387
61c387
From f12cdd204982548b4d5fed16c0a460d4a5ced217 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 13 Sep 2022 17:33:10 -0500
61c387
Subject: [PATCH 08/24] Refactor: liblrmd: add internal API for getting
61c387
 metadata async
61c387
61c387
Nothing uses it yet
61c387
---
61c387
 include/crm/lrmd_internal.h |  10 +++-
61c387
 lib/lrmd/lrmd_client.c      | 115 ++++++++++++++++++++++++++++++++++++
61c387
 2 files changed, 123 insertions(+), 2 deletions(-)
61c387
61c387
diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h
61c387
index 284c4d6..5cb00d5 100644
61c387
--- a/include/crm/lrmd_internal.h
61c387
+++ b/include/crm/lrmd_internal.h
61c387
@@ -1,5 +1,5 @@
61c387
 /*
61c387
- * Copyright 2015-2021 the Pacemaker project contributors
61c387
+ * Copyright 2015-2022 the Pacemaker project contributors
61c387
  *
61c387
  * The version control history for this file may have further details.
61c387
  *
61c387
@@ -17,7 +17,7 @@
61c387
 #include <crm/common/mainloop.h>        // mainloop_io_t, ipc_client_callbacks
61c387
 #include <crm/common/output_internal.h> // pcmk__output_t
61c387
 #include <crm/common/remote_internal.h> // pcmk__remote_t
61c387
-#include <crm/lrmd.h>                   // lrmd_t, lrmd_event_data_t
61c387
+#include <crm/lrmd.h>           // lrmd_t, lrmd_event_data_t, lrmd_rsc_info_t
61c387
 
61c387
 int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port);
61c387
 
61c387
@@ -35,6 +35,12 @@ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list,
61c387
 int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id,
61c387
                           const char *msg_type);
61c387
 
61c387
+int lrmd__metadata_async(lrmd_rsc_info_t *rsc,
61c387
+                         void (*callback)(int pid,
61c387
+                                          const pcmk__action_result_t *result,
61c387
+                                          void *user_data),
61c387
+                         void *user_data);
61c387
+
61c387
 void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc,
61c387
                       int op_status, const char *exit_reason);
61c387
 
61c387
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
61c387
index 82afd6c..4b16bf0 100644
61c387
--- a/lib/lrmd/lrmd_client.c
61c387
+++ b/lib/lrmd/lrmd_client.c
61c387
@@ -2343,6 +2343,121 @@ lrmd_api_delete(lrmd_t * lrmd)
61c387
     free(lrmd);
61c387
 }
61c387
 
61c387
+struct metadata_cb {
61c387
+     void (*callback)(int pid, const pcmk__action_result_t *result,
61c387
+                      void *user_data);
61c387
+     void *user_data;
61c387
+};
61c387
+
61c387
+/*!
61c387
+ * \internal
61c387
+ * \brief Process asynchronous metadata completion
61c387
+ *
61c387
+ * \param[in] action  Metadata action that completed
61c387
+ */
61c387
+static void
61c387
+metadata_complete(svc_action_t *action)
61c387
+{
61c387
+    struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data;
61c387
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
61c387
+
61c387
+    pcmk__set_result(&result, action->rc, action->status,
61c387
+                     services__exit_reason(action));
61c387
+    pcmk__set_result_output(&result, action->stdout_data, action->stderr_data);
61c387
+
61c387
+    metadata_cb->callback(0, &result, metadata_cb->user_data);
61c387
+    result.action_stdout = NULL; // Prevent free, because action owns it
61c387
+    result.action_stderr = NULL; // Prevent free, because action owns it
61c387
+    pcmk__reset_result(&result);
61c387
+    free(metadata_cb);
61c387
+}
61c387
+
61c387
+/*!
61c387
+ * \internal
61c387
+ * \brief Retrieve agent metadata asynchronously
61c387
+ *
61c387
+ * \param[in] rsc        Resource agent specification
61c387
+ * \param[in] callback   Function to call with result (this will always be
61c387
+ *                       called, whether by this function directly or later via
61c387
+ *                       the main loop, and on success the metadata will be in
61c387
+ *                       its result argument's action_stdout)
61c387
+ * \param[in] user_data  User data to pass to callback
61c387
+ *
61c387
+ * \return Standard Pacemaker return code
61c387
+ * \note This function is not a lrmd_api_operations_t method because it does not
61c387
+ *       need an lrmd_t object and does not go through the executor, but
61c387
+ *       executes the agent directly.
61c387
+ */
61c387
+int
61c387
+lrmd__metadata_async(lrmd_rsc_info_t *rsc,
61c387
+                     void (*callback)(int pid,
61c387
+                                      const pcmk__action_result_t *result,
61c387
+                                      void *user_data),
61c387
+                     void *user_data)
61c387
+{
61c387
+    svc_action_t *action = NULL;
61c387
+    struct metadata_cb *metadata_cb = NULL;
61c387
+    pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
61c387
+
61c387
+    CRM_CHECK(callback != NULL, return EINVAL);
61c387
+
61c387
+    if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
61c387
+        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
61c387
+                         "Invalid resource specification");
61c387
+        callback(0, &result, user_data);
61c387
+        pcmk__reset_result(&result);
61c387
+        return EINVAL;
61c387
+    }
61c387
+
61c387
+    if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) {
61c387
+        return stonith__metadata_async(rsc->type,
61c387
+                                       CRMD_METADATA_CALL_TIMEOUT / 1000,
61c387
+                                       callback, user_data);
61c387
+    }
61c387
+
61c387
+    action = services__create_resource_action(rsc->type, rsc->standard,
61c387
+                                              rsc->provider, rsc->type,
61c387
+                                              CRMD_ACTION_METADATA, 0,
61c387
+                                              CRMD_METADATA_CALL_TIMEOUT, NULL,
61c387
+                                              0);
61c387
+    if (action == NULL) {
61c387
+        pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
61c387
+                         "Out of memory");
61c387
+        callback(0, &result, user_data);
61c387
+        pcmk__reset_result(&result);
61c387
+        return ENOMEM;
61c387
+    }
61c387
+    if (action->rc != PCMK_OCF_UNKNOWN) {
61c387
+        pcmk__set_result(&result, action->rc, action->status,
61c387
+                         services__exit_reason(action));
61c387
+        callback(0, &result, user_data);
61c387
+        pcmk__reset_result(&result);
61c387
+        services_action_free(action);
61c387
+        return EINVAL;
61c387
+    }
61c387
+
61c387
+    action->cb_data = calloc(1, sizeof(struct metadata_cb));
61c387
+    if (action->cb_data == NULL) {
61c387
+        services_action_free(action);
61c387
+        pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
61c387
+                         "Out of memory");
61c387
+        callback(0, &result, user_data);
61c387
+        pcmk__reset_result(&result);
61c387
+        return ENOMEM;
61c387
+    }
61c387
+
61c387
+    metadata_cb = (struct metadata_cb *) action->cb_data;
61c387
+    metadata_cb->callback = callback;
61c387
+    metadata_cb->user_data = user_data;
61c387
+    if (!services_action_async(action, metadata_complete)) {
61c387
+        services_action_free(action);
61c387
+        return pcmk_rc_error; // @TODO Derive from action->rc and ->status
61c387
+    }
61c387
+
61c387
+    // The services library has taken responsibility for action
61c387
+    return pcmk_rc_ok;
61c387
+}
61c387
+
61c387
 /*!
61c387
  * \internal
61c387
  * \brief Set the result of an executor event
61c387
-- 
61c387
2.31.1
61c387
61c387
From 48f3165bda7eb59b6ade45ecbafc1ad396564c67 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Wed, 14 Sep 2022 16:34:37 -0500
61c387
Subject: [PATCH 09/24] Low: controller: ignore CRM_OP_LRM_REFRESH
61c387
61c387
This was only sent by crm_resource --refresh in versions 1.1.9 and earlier.
61c387
Since the local crm_resource is the same version as the controller, and
61c387
Pacemaker Remote was introduced in 1.1.9, this means that only remote nodes
61c387
running 1.1.9 can possibly send it.
61c387
61c387
It didn't really do anything useful anyway, so just ignore it.
61c387
---
61c387
 daemons/controld/controld_execd.c    | 33 +++++-----------------------
61c387
 daemons/controld/controld_messages.c |  2 +-
61c387
 include/crm/crm.h                    |  2 +-
61c387
 lib/pacemaker/pcmk_graph_producer.c  |  3 +--
61c387
 lib/pengine/common.c                 |  2 --
61c387
 5 files changed, 9 insertions(+), 33 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index fa411a6..719fab0 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -1553,32 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
61c387
     lrmd_free_event(op);
61c387
 }
61c387
 
61c387
-static void
61c387
-handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
61c387
-                  const char *from_host, const char *from_sys)
61c387
-{
61c387
-    int rc = pcmk_ok;
61c387
-    xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
61c387
-
61c387
-    fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
61c387
-    crm_info("Forced a local resource history refresh: call=%d", rc);
61c387
-
61c387
-    if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) {
61c387
-        xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
61c387
-                                        from_sys, CRM_SYSTEM_LRMD,
61c387
-                                        fsa_our_uuid);
61c387
-
61c387
-        crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
61c387
-
61c387
-        if (relay_message(reply, TRUE) == FALSE) {
61c387
-            crm_log_xml_err(reply, "Unable to route reply");
61c387
-        }
61c387
-        free_xml(reply);
61c387
-    }
61c387
-
61c387
-    free_xml(fragment);
61c387
-}
61c387
-
61c387
 static void
61c387
 handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
61c387
 {
61c387
@@ -1787,7 +1761,12 @@ do_lrm_invoke(long long action,
61c387
     }
61c387
 
61c387
     if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
61c387
-        handle_refresh_op(lrm_state, user_name, from_host, from_sys);
61c387
+        /* @COMPAT This can only be sent by crm_resource --refresh on a
61c387
+         * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
61c387
+         * unlikely. It previously would cause the controller to re-write its
61c387
+         * resource history to the CIB. Just ignore it.
61c387
+         */
61c387
+        crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
61c387
 
61c387
     } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
61c387
         handle_query_op(input->msg, lrm_state);
61c387
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
61c387
index 31d3524..957fc20 100644
61c387
--- a/daemons/controld/controld_messages.c
61c387
+++ b/daemons/controld/controld_messages.c
61c387
@@ -1061,7 +1061,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
61c387
         return handle_lrm_delete(stored_msg);
61c387
 
61c387
     } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
61c387
-               || (strcmp(op, CRM_OP_LRM_REFRESH) == 0)
61c387
+               || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
61c387
                || (strcmp(op, CRM_OP_REPROBE) == 0)) {
61c387
 
61c387
         crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
61c387
diff --git a/include/crm/crm.h b/include/crm/crm.h
61c387
index 5ec66d2..f2e536e 100644
61c387
--- a/include/crm/crm.h
61c387
+++ b/include/crm/crm.h
61c387
@@ -146,7 +146,7 @@ extern char *crm_system_name;
61c387
 #  define CRM_OP_REGISTER		"register"
61c387
 #  define CRM_OP_IPC_FWD		"ipc_fwd"
61c387
 #  define CRM_OP_INVOKE_LRM	"lrm_invoke"
61c387
-#  define CRM_OP_LRM_REFRESH	"lrm_refresh" /* Deprecated */
61c387
+#  define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
61c387
 #  define CRM_OP_LRM_QUERY	"lrm_query"
61c387
 #  define CRM_OP_LRM_DELETE	"lrm_delete"
61c387
 #  define CRM_OP_LRM_FAIL		"lrm_fail"
61c387
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
61c387
index 4c1b5a6..0077719 100644
61c387
--- a/lib/pacemaker/pcmk_graph_producer.c
61c387
+++ b/lib/pacemaker/pcmk_graph_producer.c
61c387
@@ -446,8 +446,7 @@ create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
61c387
 
61c387
     } else if (pcmk__str_any_of(action->task,
61c387
                                 CRM_OP_SHUTDOWN,
61c387
-                                CRM_OP_CLEAR_FAILCOUNT,
61c387
-                                CRM_OP_LRM_REFRESH, NULL)) {
61c387
+                                CRM_OP_CLEAR_FAILCOUNT, NULL)) {
61c387
         action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
61c387
 
61c387
     } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
61c387
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
61c387
index 93ba3fe..7db9d0e 100644
61c387
--- a/lib/pengine/common.c
61c387
+++ b/lib/pengine/common.c
61c387
@@ -384,8 +384,6 @@ text2task(const char *task)
61c387
         return no_action;
61c387
     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
61c387
         return no_action;
61c387
-    } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
61c387
-        return no_action;
61c387
     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
61c387
         return no_action;
61c387
     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
61c387
-- 
61c387
2.31.1
61c387
61c387
From 366f943f30f2d3d67b74db2bafa0098874f1b67e Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Wed, 14 Sep 2022 16:46:15 -0500
61c387
Subject: [PATCH 10/24] API: libcrmcommon: deprecate CRM_OP_LRM_QUERY
61c387
61c387
This has been unused since at least Pacemaker 1.0.0, and since we don't support
61c387
rolling upgrades from anything that old, and Pacemaker Remote didn't exist
61c387
then, we can just drop support for it entirely.
61c387
---
61c387
 daemons/controld/controld_execd.c | 17 -----------------
61c387
 include/crm/crm.h                 |  1 -
61c387
 include/crm/crm_compat.h          |  5 ++++-
61c387
 3 files changed, 4 insertions(+), 19 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 719fab0..54e6818 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -1553,20 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
61c387
     lrmd_free_event(op);
61c387
 }
61c387
 
61c387
-static void
61c387
-handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
61c387
-{
61c387
-    xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
61c387
-    xmlNode *reply = create_reply(msg, data);
61c387
-
61c387
-    if (relay_message(reply, TRUE) == FALSE) {
61c387
-        crm_err("Unable to route reply");
61c387
-        crm_log_xml_err(reply, "reply");
61c387
-    }
61c387
-    free_xml(reply);
61c387
-    free_xml(data);
61c387
-}
61c387
-
61c387
 static void
61c387
 handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
61c387
                   const char *from_host, const char *user_name,
61c387
@@ -1768,9 +1754,6 @@ do_lrm_invoke(long long action,
61c387
          */
61c387
         crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
61c387
 
61c387
-    } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
61c387
-        handle_query_op(input->msg, lrm_state);
61c387
-
61c387
     // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
61c387
     } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) {
61c387
         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
61c387
diff --git a/include/crm/crm.h b/include/crm/crm.h
61c387
index f2e536e..38915e3 100644
61c387
--- a/include/crm/crm.h
61c387
+++ b/include/crm/crm.h
61c387
@@ -147,7 +147,6 @@ extern char *crm_system_name;
61c387
 #  define CRM_OP_IPC_FWD		"ipc_fwd"
61c387
 #  define CRM_OP_INVOKE_LRM	"lrm_invoke"
61c387
 #  define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
61c387
-#  define CRM_OP_LRM_QUERY	"lrm_query"
61c387
 #  define CRM_OP_LRM_DELETE	"lrm_delete"
61c387
 #  define CRM_OP_LRM_FAIL		"lrm_fail"
61c387
 #  define CRM_OP_PROBED		"probe_complete"
61c387
diff --git a/include/crm/crm_compat.h b/include/crm/crm_compat.h
61c387
index 3b35a5e..8a4b368 100644
61c387
--- a/include/crm/crm_compat.h
61c387
+++ b/include/crm/crm_compat.h
61c387
@@ -1,5 +1,5 @@
61c387
 /*
61c387
- * Copyright 2004-2021 the Pacemaker project contributors
61c387
+ * Copyright 2004-2022 the Pacemaker project contributors
61c387
  *
61c387
  * The version control history for this file may have further details.
61c387
  *
61c387
@@ -31,6 +31,9 @@ extern "C" {
61c387
 //! \deprecated This defined constant will be removed in a future release
61c387
 #define MAX_IPC_DELAY 120
61c387
 
61c387
+//! \deprecated This defined constant will be removed in a future release
61c387
+#define CRM_OP_LRM_QUERY "lrm_query"
61c387
+
61c387
 //!@{
61c387
 //! \deprecated This macro will be removed in a future release
61c387
 
61c387
-- 
61c387
2.31.1
61c387
61c387
From 7107aaceffb243990846e4a6cd5d2f0c82b3c0cf Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Wed, 14 Sep 2022 16:49:48 -0500
61c387
Subject: [PATCH 11/24] Refactor: controller: drop do_lrm_query_internal()
61c387
61c387
Now that there's only one (short) caller, just move its contents there
61c387
---
61c387
 daemons/controld/controld_execd.c | 28 +++++++++++-----------------
61c387
 1 file changed, 11 insertions(+), 17 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 54e6818..99c9193 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -811,19 +811,26 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
61c387
     return FALSE;
61c387
 }
61c387
 
61c387
-static xmlNode *
61c387
-do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
61c387
+xmlNode *
61c387
+controld_query_executor_state(const char *node_name)
61c387
 {
61c387
     xmlNode *xml_state = NULL;
61c387
     xmlNode *xml_data = NULL;
61c387
     xmlNode *rsc_list = NULL;
61c387
     crm_node_t *peer = NULL;
61c387
+    lrm_state_t *lrm_state = lrm_state_find(node_name);
61c387
+
61c387
+    if (!lrm_state) {
61c387
+        crm_err("Could not find executor state for node %s", node_name);
61c387
+        return NULL;
61c387
+    }
61c387
 
61c387
     peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
61c387
     CRM_CHECK(peer != NULL, return NULL);
61c387
 
61c387
-    xml_state = create_node_state_update(peer, update_flags, NULL,
61c387
-                                         __func__);
61c387
+    xml_state = create_node_state_update(peer,
61c387
+                                         node_update_cluster|node_update_peer,
61c387
+                                         NULL, __func__);
61c387
     if (xml_state == NULL) {
61c387
         return NULL;
61c387
     }
61c387
@@ -840,19 +847,6 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
61c387
     return xml_state;
61c387
 }
61c387
 
61c387
-xmlNode *
61c387
-controld_query_executor_state(const char *node_name)
61c387
-{
61c387
-    lrm_state_t *lrm_state = lrm_state_find(node_name);
61c387
-
61c387
-    if (!lrm_state) {
61c387
-        crm_err("Could not find executor state for node %s", node_name);
61c387
-        return NULL;
61c387
-    }
61c387
-    return do_lrm_query_internal(lrm_state,
61c387
-                                 node_update_cluster|node_update_peer);
61c387
-}
61c387
-
61c387
 /*!
61c387
  * \internal
61c387
  * \brief Map standard Pacemaker return code to operation status and OCF code
61c387
-- 
61c387
2.31.1
61c387
61c387
From 9549e2bdd0781a6b09546da6df793d549d4db0cc Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Wed, 14 Sep 2022 15:48:44 -0500
61c387
Subject: [PATCH 12/24] Doc: controller: drop pointless comment
61c387
61c387
It's (likely?) impossible for a live cluster to have been doing rolling
61c387
upgrades since 2006.
61c387
---
61c387
 daemons/controld/controld_execd.c | 12 +-----------
61c387
 1 file changed, 1 insertion(+), 11 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 99c9193..53b1156 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -678,18 +678,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
61c387
 
61c387
     target_rc = rsc_op_expected_rc(op);
61c387
 
61c387
-    /* there is a small risk in formerly mixed clusters that it will
61c387
-     * be sub-optimal.
61c387
-     *
61c387
-     * however with our upgrade policy, the update we send should
61c387
-     * still be completely supported anyway
61c387
-     */
61c387
     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
61c387
-    CRM_LOG_ASSERT(caller_version != NULL);
61c387
-
61c387
-    if(caller_version == NULL) {
61c387
-        caller_version = CRM_FEATURE_SET;
61c387
-    }
61c387
+    CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
61c387
 
61c387
     xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
61c387
                                       fsa_our_uname, src);
61c387
-- 
61c387
2.31.1
61c387
61c387
From 393b6f25be30e7e5d5d63c70dc479aa5e2cd14e4 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Thu, 15 Sep 2022 11:24:28 -0500
61c387
Subject: [PATCH 13/24] Refactor: controller: move where reload actions get
61c387
 remapped
61c387
61c387
... from do_lrm_invoke() to do_lrm_rsc_op(), which will make planned changes
61c387
easier
61c387
---
61c387
 daemons/controld/controld_execd.c | 38 ++++++++++++++++---------------
61c387
 1 file changed, 20 insertions(+), 18 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 53b1156..c9f0cc7 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -43,7 +43,8 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
61c387
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
61c387
                                        const char *rsc_id, const char *operation);
61c387
 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
-                          const char *operation, xmlNode *msg);
61c387
+                          const char *operation, xmlNode *msg,
61c387
+                          struct ra_metadata_s *md);
61c387
 
61c387
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
61c387
                                          int log_level);
61c387
@@ -1808,26 +1809,12 @@ do_lrm_invoke(long long action,
61c387
             do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
61c387
                           crm_rsc_delete, user_name);
61c387
 
61c387
-        } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
61c387
-                                    CRMD_ACTION_RELOAD_AGENT, NULL)) {
61c387
-            /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
61c387
-             * will schedule reload-agent actions only. In either case, we need
61c387
-             * to map that to whatever the resource agent actually supports.
61c387
-             * Default to the OCF 1.1 name.
61c387
-             */
61c387
+        } else {
61c387
             struct ra_metadata_s *md = NULL;
61c387
-            const char *reload_name = CRMD_ACTION_RELOAD_AGENT;
61c387
 
61c387
             md = controld_get_rsc_metadata(lrm_state, rsc,
61c387
                                            controld_metadata_from_cache);
61c387
-            if ((md != NULL)
61c387
-                && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
61c387
-                reload_name = CRMD_ACTION_RELOAD;
61c387
-            }
61c387
-            do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml);
61c387
-
61c387
-        } else {
61c387
-            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml);
61c387
+            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
61c387
         }
61c387
 
61c387
         lrmd_free_rsc_info(rsc);
61c387
@@ -2176,7 +2163,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
61c387
 
61c387
 static void
61c387
 do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
-              const char *operation, xmlNode *msg)
61c387
+              const char *operation, xmlNode *msg, struct ra_metadata_s *md)
61c387
 {
61c387
     int rc;
61c387
     int call_id = 0;
61c387
@@ -2198,6 +2185,21 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
         }
61c387
     }
61c387
 
61c387
+    if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
61c387
+                         CRMD_ACTION_RELOAD_AGENT, NULL)) {
61c387
+        /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
61c387
+         * will schedule reload-agent actions only. In either case, we need
61c387
+         * to map that to whatever the resource agent actually supports.
61c387
+         * Default to the OCF 1.1 name.
61c387
+         */
61c387
+        if ((md != NULL)
61c387
+            && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
61c387
+            operation = CRMD_ACTION_RELOAD;
61c387
+        } else {
61c387
+            operation = CRMD_ACTION_RELOAD_AGENT;
61c387
+        }
61c387
+    }
61c387
+
61c387
     op = construct_op(lrm_state, msg, rsc->id, operation);
61c387
     CRM_CHECK(op != NULL, return);
61c387
 
61c387
-- 
61c387
2.31.1
61c387
61c387
From b134383ef516a7c676a714b3b51594fcfaec9ede Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Thu, 15 Sep 2022 15:12:06 -0500
61c387
Subject: [PATCH 14/24] Refactor: controller: drop operation argument to
61c387
 do_lrm_rsc_op()
61c387
61c387
It can be derived from the XML argument
61c387
---
61c387
 daemons/controld/controld_execd.c | 24 ++++++++++++------------
61c387
 1 file changed, 12 insertions(+), 12 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index c9f0cc7..3590f98 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -43,8 +43,7 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
61c387
 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
61c387
                                        const char *rsc_id, const char *operation);
61c387
 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
-                          const char *operation, xmlNode *msg,
61c387
-                          struct ra_metadata_s *md);
61c387
+                          xmlNode *msg, struct ra_metadata_s *md);
61c387
 
61c387
 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
61c387
                                          int log_level);
61c387
@@ -1814,7 +1813,7 @@ do_lrm_invoke(long long action,
61c387
 
61c387
             md = controld_get_rsc_metadata(lrm_state, rsc,
61c387
                                            controld_metadata_from_cache);
61c387
-            do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
61c387
+            do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
61c387
         }
61c387
 
61c387
         lrmd_free_rsc_info(rsc);
61c387
@@ -2162,8 +2161,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
61c387
 }
61c387
 
61c387
 static void
61c387
-do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
-              const char *operation, xmlNode *msg, struct ra_metadata_s *md)
61c387
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
61c387
+              struct ra_metadata_s *md)
61c387
 {
61c387
     int rc;
61c387
     int call_id = 0;
61c387
@@ -2172,17 +2171,18 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
     lrmd_key_value_t *params = NULL;
61c387
     fsa_data_t *msg_data = NULL;
61c387
     const char *transition = NULL;
61c387
+    const char *operation = NULL;
61c387
     gboolean stop_recurring = FALSE;
61c387
     const char *nack_reason = NULL;
61c387
 
61c387
-    CRM_CHECK(rsc != NULL, return);
61c387
-    CRM_CHECK(operation != NULL, return);
61c387
+    CRM_CHECK((rsc != NULL) && (msg != NULL), return);
61c387
 
61c387
-    if (msg != NULL) {
61c387
-        transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
61c387
-        if (transition == NULL) {
61c387
-            crm_log_xml_err(msg, "Missing transition number");
61c387
-        }
61c387
+    operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
61c387
+    CRM_CHECK(!pcmk__str_empty(operation), return);
61c387
+
61c387
+    transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
61c387
+    if (pcmk__str_empty(transition)) {
61c387
+        crm_log_xml_err(msg, "Missing transition number");
61c387
     }
61c387
 
61c387
     if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
61c387
-- 
61c387
2.31.1
61c387
61c387
From 563f6e8cebb2b64ba5426b87e5923cbc5bb1df11 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Thu, 15 Sep 2022 15:40:38 -0500
61c387
Subject: [PATCH 15/24] Low: controller: add failsafe for no executor
61c387
 connection
61c387
61c387
... in do_lrm_rsc_op(), to make planned changes easier
61c387
---
61c387
 daemons/controld/controld_execd.c | 11 +++++++++++
61c387
 1 file changed, 11 insertions(+)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 3590f98..274ca95 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -2185,6 +2185,17 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
61c387
         crm_log_xml_err(msg, "Missing transition number");
61c387
     }
61c387
 
61c387
+    if (lrm_state == NULL) {
61c387
+        // This shouldn't be possible, but provide a failsafe just in case
61c387
+        crm_err("Cannot execute %s of %s: No executor connection "
61c387
+                CRM_XS " transition_key=%s",
61c387
+                operation, rsc->id, (transition != NULL) ? transition : "");
61c387
+        synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
61c387
+                                PCMK_OCF_UNKNOWN_ERROR,
61c387
+                                "No executor connection");
61c387
+        return;
61c387
+    }
61c387
+
61c387
     if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
61c387
                          CRMD_ACTION_RELOAD_AGENT, NULL)) {
61c387
         /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
61c387
-- 
61c387
2.31.1
61c387
61c387
From 571a5766e025f2a1235be169180e6eb5a9b7b7ea Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Thu, 15 Sep 2022 12:04:31 -0500
61c387
Subject: [PATCH 16/24] Log: controller: improve messages when metadata cache
61c387
 update fails
61c387
61c387
Previously, metadata_cache_update() or ra_param_from_xml() would log an error,
61c387
then controld_get_rsc_metadata() (but not the other caller,
61c387
process_lrm_event()) would log another warning with the agent info.
61c387
61c387
Combine these into a single message always logged by metadata_cache_update(),
61c387
which also has been renamed to controld_cache_metadata().
61c387
---
61c387
 daemons/controld/controld_execd.c    |  2 +-
61c387
 daemons/controld/controld_metadata.c | 27 ++++++++++++---------------
61c387
 daemons/controld/controld_metadata.h |  6 +++---
61c387
 3 files changed, 16 insertions(+), 19 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 274ca95..1ffcc64 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -2858,7 +2858,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
61c387
         } else if (rsc && (op->rc == PCMK_OCF_OK)) {
61c387
             char *metadata = unescape_newlines(op->output);
61c387
 
61c387
-            metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
61c387
+            controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
61c387
             free(metadata);
61c387
         }
61c387
     }
61c387
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
61c387
index 8c6f195..ddcd5db 100644
61c387
--- a/daemons/controld/controld_metadata.c
61c387
+++ b/daemons/controld/controld_metadata.c
61c387
@@ -149,13 +149,11 @@ ra_param_from_xml(xmlNode *param_xml)
61c387
 
61c387
     p = calloc(1, sizeof(struct ra_param_s));
61c387
     if (p == NULL) {
61c387
-        crm_crit("Could not allocate memory for resource metadata");
61c387
         return NULL;
61c387
     }
61c387
 
61c387
     p->rap_name = strdup(param_name);
61c387
     if (p->rap_name == NULL) {
61c387
-        crm_crit("Could not allocate memory for resource metadata");
61c387
         free(p);
61c387
         return NULL;
61c387
     }
61c387
@@ -196,10 +194,11 @@ log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version)
61c387
 }
61c387
 
61c387
 struct ra_metadata_s *
61c387
-metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
61c387
-                      const char *metadata_str)
61c387
+controld_cache_metadata(GHashTable *mdc, const lrmd_rsc_info_t *rsc,
61c387
+                        const char *metadata_str)
61c387
 {
61c387
     char *key = NULL;
61c387
+    const char *reason = NULL;
61c387
     xmlNode *metadata = NULL;
61c387
     xmlNode *match = NULL;
61c387
     struct ra_metadata_s *md = NULL;
61c387
@@ -210,20 +209,19 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
61c387
 
61c387
     key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
61c387
     if (!key) {
61c387
-        crm_crit("Could not allocate memory for resource metadata");
61c387
+        reason = "Invalid resource agent standard or type";
61c387
         goto err;
61c387
     }
61c387
 
61c387
     metadata = string2xml(metadata_str);
61c387
     if (!metadata) {
61c387
-        crm_err("Metadata for %s:%s:%s is not valid XML",
61c387
-                rsc->standard, rsc->provider, rsc->type);
61c387
+        reason = "Metadata is not valid XML";
61c387
         goto err;
61c387
     }
61c387
 
61c387
     md = calloc(1, sizeof(struct ra_metadata_s));
61c387
     if (md == NULL) {
61c387
-        crm_crit("Could not allocate memory for resource metadata");
61c387
+        reason = "Could not allocate memory";
61c387
         goto err;
61c387
     }
61c387
 
61c387
@@ -281,6 +279,7 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
61c387
             struct ra_param_s *p = ra_param_from_xml(match);
61c387
 
61c387
             if (p == NULL) {
61c387
+                reason = "Could not allocate memory";
61c387
                 goto err;
61c387
             }
61c387
             if (pcmk_is_set(p->rap_flags, ra_param_private)) {
61c387
@@ -311,6 +310,9 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
61c387
     return md;
61c387
 
61c387
 err:
61c387
+    crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s",
61c387
+             rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
61c387
+             ((rsc->provider == NULL)? "" : rsc->provider), rsc->type, reason);
61c387
     free(key);
61c387
     free_xml(metadata);
61c387
     metadata_free(md);
61c387
@@ -377,13 +379,8 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
         return NULL;
61c387
     }
61c387
 
61c387
-    metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
61c387
-                                     metadata_str);
61c387
+    metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc,
61c387
+                                       metadata_str);
61c387
     free(metadata_str);
61c387
-    if (metadata == NULL) {
61c387
-        crm_warn("Failed to update metadata for %s (%s%s%s:%s)",
61c387
-                 rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
61c387
-                 ((rsc->provider == NULL)? "" : rsc->provider), rsc->type);
61c387
-    }
61c387
     return metadata;
61c387
 }
61c387
diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h
61c387
index 7354f94..3903cce 100644
61c387
--- a/daemons/controld/controld_metadata.h
61c387
+++ b/daemons/controld/controld_metadata.h
61c387
@@ -73,9 +73,9 @@ void metadata_cache_free(GHashTable *mdc);
61c387
 void metadata_cache_reset(GHashTable *mdc);
61c387
 void metadata_cache_fini(void);
61c387
 
61c387
-struct ra_metadata_s *metadata_cache_update(GHashTable *mdc,
61c387
-                                            lrmd_rsc_info_t *rsc,
61c387
-                                            const char *metadata_str);
61c387
+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc,
61c387
+                                              const lrmd_rsc_info_t *rsc,
61c387
+                                              const char *metadata_str);
61c387
 struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state,
61c387
                                                 lrmd_rsc_info_t *rsc,
61c387
                                                 uint32_t source);
61c387
-- 
61c387
2.31.1
61c387
61c387
From 6738e4d1aef78955c796f0afa431d656f25c8179 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Thu, 15 Sep 2022 13:33:36 -0500
61c387
Subject: [PATCH 17/24] Fix: controller: pre-load agent metadata asynchronously
61c387
61c387
The controller needs resource agent metadata to record digests with pending and
61c387
completed resource actions.
61c387
61c387
Previously, metadata was collected synchronously when needed. This caused
61c387
several problems, two of which are fixed here for most actions: synchronous
61c387
execution blocks the controller from doing anything else (and if the agent's
61c387
metadata action tries to contact the controller, that blocks everything until
61c387
the action times out), and the metadata action ate into the real action's
61c387
timeout.
61c387
61c387
Now, if we're likely to need metadata for an action, attempt to get it
61c387
asynchronously before executing that action, so the metadata is available in
61c387
cache when needed.
61c387
61c387
This is not a complete solution, as there are other code paths that might
61c387
require metadata and still lead to synchronous execution, but it handles the
61c387
most important cases.
61c387
61c387
Fixes T554
61c387
---
61c387
 daemons/controld/controld_execd.c    | 105 +++++++++++++++++++++++----
61c387
 daemons/controld/controld_metadata.c |  22 +++---
61c387
 2 files changed, 102 insertions(+), 25 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 1ffcc64..6b65f52 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -670,7 +670,6 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
61c387
     struct ra_metadata_s *metadata = NULL;
61c387
     const char *caller_version = NULL;
61c387
     lrm_state_t *lrm_state = NULL;
61c387
-    uint32_t metadata_source = controld_metadata_from_agent;
61c387
 
61c387
     if (op == NULL) {
61c387
         return FALSE;
61c387
@@ -703,19 +702,14 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
61c387
         return TRUE;
61c387
     }
61c387
 
61c387
-    /* Getting meta-data from cache is OK unless this is a successful start
61c387
-     * action -- always refresh from the agent for those, in case the
61c387
-     * resource agent was updated.
61c387
+    /* Ideally the metadata is cached, and the agent is just a fallback.
61c387
      *
61c387
-     * @TODO Only refresh the meta-data after starts if the agent actually
61c387
-     * changed (using something like inotify, or a hash or modification time of
61c387
-     * the agent executable).
61c387
+     * @TODO Go through all callers and ensure they get metadata asynchronously
61c387
+     * first.
61c387
      */
61c387
-    if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc)
61c387
-        || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) {
61c387
-        metadata_source |= controld_metadata_from_cache;
61c387
-    }
61c387
-    metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source);
61c387
+    metadata = controld_get_rsc_metadata(lrm_state, rsc,
61c387
+                                         controld_metadata_from_agent
61c387
+                                         |controld_metadata_from_cache);
61c387
     if (metadata == NULL) {
61c387
         return TRUE;
61c387
     }
61c387
@@ -1673,6 +1667,56 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
61c387
                     user_name, input, unregister);
61c387
 }
61c387
 
61c387
+// User data for asynchronous metadata execution
61c387
+struct metadata_cb_data {
61c387
+    lrmd_rsc_info_t *rsc;   // Copy of resource information
61c387
+    xmlNode *input_xml;     // Copy of FSA input XML
61c387
+};
61c387
+
61c387
+static struct metadata_cb_data *
61c387
+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
61c387
+{
61c387
+    struct metadata_cb_data *data = NULL;
61c387
+
61c387
+    data = calloc(1, sizeof(struct metadata_cb_data));
61c387
+    CRM_ASSERT(data != NULL);
61c387
+    data->input_xml = copy_xml(input_xml);
61c387
+    data->rsc = lrmd_copy_rsc_info(rsc);
61c387
+    return data;
61c387
+}
61c387
+
61c387
+static void
61c387
+free_metadata_cb_data(struct metadata_cb_data *data)
61c387
+{
61c387
+    lrmd_free_rsc_info(data->rsc);
61c387
+    free_xml(data->input_xml);
61c387
+    free(data);
61c387
+}
61c387
+
61c387
+/*!
61c387
+ * \internal
61c387
+ * \brief Execute an action after metadata has been retrieved
61c387
+ *
61c387
+ * \param[in] pid        Ignored
61c387
+ * \param[in] result     Result of metadata action
61c387
+ * \param[in] user_data  Metadata callback data
61c387
+ */
61c387
+static void
61c387
+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
61c387
+{
61c387
+    struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
61c387
+
61c387
+    struct ra_metadata_s *md = NULL;
61c387
+    lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
61c387
+
61c387
+    if ((lrm_state != NULL) && pcmk__result_ok(result)) {
61c387
+        md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
61c387
+                                     result->action_stdout);
61c387
+    }
61c387
+    do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
61c387
+    free_metadata_cb_data(data);
61c387
+}
61c387
+
61c387
 /*	 A_LRM_INVOKE	*/
61c387
 void
61c387
 do_lrm_invoke(long long action,
61c387
@@ -1811,9 +1855,40 @@ do_lrm_invoke(long long action,
61c387
         } else {
61c387
             struct ra_metadata_s *md = NULL;
61c387
 
61c387
-            md = controld_get_rsc_metadata(lrm_state, rsc,
61c387
-                                           controld_metadata_from_cache);
61c387
-            do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
61c387
+            /* Getting metadata from cache is OK except for start actions --
61c387
+             * always refresh from the agent for those, in case the resource
61c387
+             * agent was updated.
61c387
+             *
61c387
+             * @TODO Only refresh metadata for starts if the agent actually
61c387
+             * changed (using something like inotify, or a hash or modification
61c387
+             * time of the agent executable).
61c387
+             */
61c387
+            if (strcmp(operation, CRMD_ACTION_START) != 0) {
61c387
+                md = controld_get_rsc_metadata(lrm_state, rsc,
61c387
+                                               controld_metadata_from_cache);
61c387
+            }
61c387
+
61c387
+            if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
61c387
+                                                      operation)) {
61c387
+                /* Most likely, we'll need the agent metadata to record the
61c387
+                 * pending operation and the operation result. Get it now rather
61c387
+                 * than wait until then, so the metadata action doesn't eat into
61c387
+                 * the real action's timeout.
61c387
+                 *
61c387
+                 * @TODO Metadata is retrieved via direct execution of the
61c387
+                 * agent, which has a couple of related issues: the executor
61c387
+                 * should execute agents, not the controller; and metadata for
61c387
+                 * Pacemaker Remote nodes should be collected on those nodes,
61c387
+                 * not locally.
61c387
+                 */
61c387
+                struct metadata_cb_data *data = NULL;
61c387
+
61c387
+                data = new_metadata_cb_data(rsc, input->xml);
61c387
+                (void) lrmd__metadata_async(rsc, metadata_complete,
61c387
+                                            (void *) data);
61c387
+            } else {
61c387
+                do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
61c387
+            }
61c387
         }
61c387
 
61c387
         lrmd_free_rsc_info(rsc);
61c387
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
61c387
index ddcd5db..f4b7560 100644
61c387
--- a/daemons/controld/controld_metadata.c
61c387
+++ b/daemons/controld/controld_metadata.c
61c387
@@ -356,17 +356,19 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
         return NULL;
61c387
     }
61c387
 
61c387
-    /* For now, we always collect resource agent meta-data via a local,
61c387
-     * synchronous, direct execution of the agent. This has multiple issues:
61c387
-     * the executor should execute agents, not the controller; meta-data for
61c387
-     * Pacemaker Remote nodes should be collected on those nodes, not
61c387
-     * locally; and the meta-data call shouldn't eat into the timeout of the
61c387
-     * real action being performed.
61c387
+    /* For most actions, metadata was cached asynchronously before action
61c387
+     * execution (via metadata_complete()).
61c387
      *
61c387
-     * These issues are planned to be addressed by having the scheduler
61c387
-     * schedule a meta-data cache check at the beginning of each transition.
61c387
-     * Once that is working, this block will only be a fallback in case the
61c387
-     * initial collection fails.
61c387
+     * However if that failed, and for other actions, retrieve the metadata now
61c387
+     * via a local, synchronous, direct execution of the agent.
61c387
+     *
61c387
+     * This has multiple issues, which is why this is just a fallback: the
61c387
+     * executor should execute agents, not the controller; metadata for
61c387
+     * Pacemaker Remote nodes should be collected on those nodes, not locally;
61c387
+     * the metadata call shouldn't eat into the timeout of the real action being
61c387
+     * performed; and the synchronous call blocks the controller (which also
61c387
+     * means that if the metadata action tries to contact the controller,
61c387
+     * everything will hang until the timeout).
61c387
      */
61c387
     rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
61c387
                                 rsc->type, &metadata_str, 0);
61c387
-- 
61c387
2.31.1
61c387
61c387
From 1fad9acf0c8544dbbe2a4092ed3debe99453712a Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Mon, 19 Sep 2022 15:19:06 -0500
61c387
Subject: [PATCH 18/24] Low: libstonithd: return CRM_EX_NOSUCH for bad agent
61c387
 namespace
61c387
61c387
Callers can't rely on a particular exit code scheme at this point,
61c387
but it doesn't hurt
61c387
---
61c387
 lib/fencing/st_client.c | 2 +-
61c387
 1 file changed, 1 insertion(+), 1 deletion(-)
61c387
61c387
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
61c387
index 91075bd..d41b066 100644
61c387
--- a/lib/fencing/st_client.c
61c387
+++ b/lib/fencing/st_client.c
61c387
@@ -2451,7 +2451,7 @@ stonith__metadata_async(const char *agent, int timeout_sec,
61c387
         default:
61c387
             {
61c387
                 pcmk__action_result_t result = {
61c387
-                    .exit_status = CRM_EX_ERROR,
61c387
+                    .exit_status = CRM_EX_NOSUCH,
61c387
                     .execution_status = PCMK_EXEC_ERROR_HARD,
61c387
                     .exit_reason = crm_strdup_printf("No such agent '%s'",
61c387
                                                      agent),
61c387
-- 
61c387
2.31.1
61c387
61c387
From f54b7807b8a642311258d407c95d49916ed09fa8 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Mon, 19 Sep 2022 15:23:43 -0500
61c387
Subject: [PATCH 19/24] Low: liblrmd: consider invalid agent specification a
61c387
 fatal error
61c387
61c387
---
61c387
 lib/lrmd/lrmd_client.c | 3 ++-
61c387
 1 file changed, 2 insertions(+), 1 deletion(-)
61c387
61c387
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
61c387
index 4b16bf0..d691dce 100644
61c387
--- a/lib/lrmd/lrmd_client.c
61c387
+++ b/lib/lrmd/lrmd_client.c
61c387
@@ -2402,7 +2402,8 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
61c387
     CRM_CHECK(callback != NULL, return EINVAL);
61c387
 
61c387
     if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
61c387
-        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
61c387
+        pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED,
61c387
+                         PCMK_EXEC_ERROR_FATAL,
61c387
                          "Invalid resource specification");
61c387
         callback(0, &result, user_data);
61c387
         pcmk__reset_result(&result);
61c387
-- 
61c387
2.31.1
61c387
61c387
From 2c5eaf642eccb1a9960eca7db3158bf52b843385 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Mon, 19 Sep 2022 15:25:12 -0500
61c387
Subject: [PATCH 20/24] Low: liblrmd: use resource ID for metadata actions when
61c387
 available
61c387
61c387
---
61c387
 lib/lrmd/lrmd_client.c | 10 +++++-----
61c387
 1 file changed, 5 insertions(+), 5 deletions(-)
61c387
61c387
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
61c387
index d691dce..a6a237c 100644
61c387
--- a/lib/lrmd/lrmd_client.c
61c387
+++ b/lib/lrmd/lrmd_client.c
61c387
@@ -2416,11 +2416,11 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
61c387
                                        callback, user_data);
61c387
     }
61c387
 
61c387
-    action = services__create_resource_action(rsc->type, rsc->standard,
61c387
-                                              rsc->provider, rsc->type,
61c387
-                                              CRMD_ACTION_METADATA, 0,
61c387
-                                              CRMD_METADATA_CALL_TIMEOUT, NULL,
61c387
-                                              0);
61c387
+    action = services__create_resource_action((rsc->id != NULL) ? rsc->id : rsc->type,
61c387
+                                              rsc->standard, rsc->provider,
61c387
+                                              rsc->type, CRMD_ACTION_METADATA,
61c387
+                                              0, CRMD_METADATA_CALL_TIMEOUT,
61c387
+                                              NULL, 0);
61c387
     if (action == NULL) {
61c387
         pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
61c387
                          "Out of memory");
61c387
-- 
61c387
2.31.1
61c387
61c387
From d60727b2681bf222284f3cdac0cfa452cf7bfaf9 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Mon, 19 Sep 2022 15:27:11 -0500
61c387
Subject: [PATCH 21/24] Refactor: controller: executor query can assume local
61c387
 node
61c387
61c387
---
61c387
 daemons/controld/controld_execd.c       | 6 +++---
61c387
 daemons/controld/controld_fsa.h         | 4 ++--
61c387
 daemons/controld/controld_join_client.c | 2 +-
61c387
 daemons/controld/controld_join_dc.c     | 2 +-
61c387
 4 files changed, 7 insertions(+), 7 deletions(-)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 6b65f52..16c13ba 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -796,16 +796,16 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
61c387
 }
61c387
 
61c387
 xmlNode *
61c387
-controld_query_executor_state(const char *node_name)
61c387
+controld_query_executor_state(void)
61c387
 {
61c387
     xmlNode *xml_state = NULL;
61c387
     xmlNode *xml_data = NULL;
61c387
     xmlNode *rsc_list = NULL;
61c387
     crm_node_t *peer = NULL;
61c387
-    lrm_state_t *lrm_state = lrm_state_find(node_name);
61c387
+    lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
61c387
 
61c387
     if (!lrm_state) {
61c387
-        crm_err("Could not find executor state for node %s", node_name);
61c387
+        crm_err("Could not find executor state for node %s", fsa_our_uname);
61c387
         return NULL;
61c387
     }
61c387
 
61c387
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
61c387
index 296232f..d137310 100644
61c387
--- a/daemons/controld/controld_fsa.h
61c387
+++ b/daemons/controld/controld_fsa.h
61c387
@@ -1,5 +1,5 @@
61c387
 /*
61c387
- * Copyright 2004-2021 the Pacemaker project contributors
61c387
+ * Copyright 2004-2022 the Pacemaker project contributors
61c387
  *
61c387
  * The version control history for this file may have further details.
61c387
  *
61c387
@@ -518,7 +518,7 @@ extern gboolean ever_had_quorum;
61c387
 // These should be moved elsewhere
61c387
 void do_update_cib_nodes(gboolean overwrite, const char *caller);
61c387
 int crmd_cib_smart_opt(void);
61c387
-xmlNode *controld_query_executor_state(const char *node_name);
61c387
+xmlNode *controld_query_executor_state(void);
61c387
 
61c387
 const char *fsa_input2string(enum crmd_fsa_input input);
61c387
 const char *fsa_state2string(enum crmd_fsa_state state);
61c387
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
61c387
index 6485856..bfec430 100644
61c387
--- a/daemons/controld/controld_join_client.c
61c387
+++ b/daemons/controld/controld_join_client.c
61c387
@@ -268,7 +268,7 @@ do_cl_join_finalize_respond(long long action,
61c387
     update_dc_expected(input->msg);
61c387
 
61c387
     /* send our status section to the DC */
61c387
-    tmp1 = controld_query_executor_state(fsa_our_uname);
61c387
+    tmp1 = controld_query_executor_state();
61c387
     if (tmp1 != NULL) {
61c387
         xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
61c387
                                         CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
61c387
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
61c387
index 9386182..9a8ea3e 100644
61c387
--- a/daemons/controld/controld_join_dc.c
61c387
+++ b/daemons/controld/controld_join_dc.c
61c387
@@ -591,7 +591,7 @@ do_dc_join_ack(long long action,
61c387
     }
61c387
     controld_delete_node_state(join_from, section, cib_scope_local);
61c387
     if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
61c387
-        xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname);
61c387
+        xmlNode *now_dc_lrmd_state = controld_query_executor_state();
61c387
 
61c387
         if (now_dc_lrmd_state != NULL) {
61c387
             fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
61c387
-- 
61c387
2.31.1
61c387
61c387
From 5069bbfeedd91e86b8faa3a641c6064900c664a9 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Tue, 20 Sep 2022 10:18:48 -0500
61c387
Subject: [PATCH 22/24] Log: controller: add messages when getting agent
61c387
 metadata
61c387
61c387
---
61c387
 daemons/controld/controld_execd.c    |  5 +++++
61c387
 daemons/controld/controld_metadata.c | 10 ++++++++++
61c387
 2 files changed, 15 insertions(+)
61c387
61c387
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
61c387
index 16c13ba..aa1c607 100644
61c387
--- a/daemons/controld/controld_execd.c
61c387
+++ b/daemons/controld/controld_execd.c
61c387
@@ -1884,6 +1884,11 @@ do_lrm_invoke(long long action,
61c387
                 struct metadata_cb_data *data = NULL;
61c387
 
61c387
                 data = new_metadata_cb_data(rsc, input->xml);
61c387
+                crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
61c387
+                         rsc->id, rsc->standard,
61c387
+                         ((rsc->provider == NULL)? "" : ":"),
61c387
+                         ((rsc->provider == NULL)? "" : rsc->provider),
61c387
+                         rsc->type);
61c387
                 (void) lrmd__metadata_async(rsc, metadata_complete,
61c387
                                             (void *) data);
61c387
             } else {
61c387
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
61c387
index f4b7560..c756a79 100644
61c387
--- a/daemons/controld/controld_metadata.c
61c387
+++ b/daemons/controld/controld_metadata.c
61c387
@@ -348,6 +348,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
             free(key);
61c387
         }
61c387
         if (metadata != NULL) {
61c387
+            crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache",
61c387
+                      rsc->id, rsc->standard,
61c387
+                      ((rsc->provider == NULL)? "" : ":"),
61c387
+                      ((rsc->provider == NULL)? "" : rsc->provider),
61c387
+                      rsc->type);
61c387
             return metadata;
61c387
         }
61c387
     }
61c387
@@ -370,6 +375,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
61c387
      * means that if the metadata action tries to contact the controller,
61c387
      * everything will hang until the timeout).
61c387
      */
61c387
+    crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously",
61c387
+              rsc->id, rsc->standard,
61c387
+              ((rsc->provider == NULL)? "" : ":"),
61c387
+              ((rsc->provider == NULL)? "" : rsc->provider),
61c387
+              rsc->type);
61c387
     rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
61c387
                                 rsc->type, &metadata_str, 0);
61c387
     if (rc != pcmk_ok) {
61c387
-- 
61c387
2.31.1
61c387
61c387
From 67d81f7d4bdcb6a888e13bcb29954165dce6cace Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Wed, 14 Sep 2022 14:36:44 -0500
61c387
Subject: [PATCH 23/24] Test: cts-lab: allow any whitespace in "Recover"
61c387
 messages
61c387
61c387
This seems to have always been multiple spaces, not sure what happened
61c387
---
61c387
 cts/lab/CTStests.py | 12 ++++++------
61c387
 cts/lab/patterns.py |  4 ++--
61c387
 2 files changed, 8 insertions(+), 8 deletions(-)
61c387
61c387
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
61c387
index 5535177..8b56758 100644
61c387
--- a/cts/lab/CTStests.py
61c387
+++ b/cts/lab/CTStests.py
61c387
@@ -1,7 +1,7 @@
61c387
 """ Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
61c387
 """
61c387
 
61c387
-__copyright__ = "Copyright 2000-2021 the Pacemaker project contributors"
61c387
+__copyright__ = "Copyright 2000-2022 the Pacemaker project contributors"
61c387
 __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
61c387
 
61c387
 #
61c387
@@ -1225,7 +1225,7 @@ class MaintenanceMode(CTSTest):
61c387
         '''Return list of errors which should be ignored'''
61c387
         return [
61c387
             r"Updating failcount for %s" % self.rid,
61c387
-            r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid,
61c387
+            r"schedulerd.*: Recover\s+%s\s+\(.*\)" % self.rid,
61c387
             r"Unknown operation: fail",
61c387
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
61c387
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
61c387
@@ -1324,7 +1324,7 @@ class ResourceRecover(CTSTest):
61c387
         '''Return list of errors which should be ignored'''
61c387
         return [
61c387
             r"Updating failcount for %s" % self.rid,
61c387
-            r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
61c387
+            r"schedulerd.*: Recover\s+(%s|%s)\s+\(.*\)" % (self.rid, self.rid_alt),
61c387
             r"Unknown operation: fail",
61c387
             self.templates["Pat:RscOpOK"] % (self.action, self.rid),
61c387
             r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
61c387
@@ -2559,7 +2559,7 @@ class RemoteLXC(CTSTest):
61c387
         '''Return list of errors which should be ignored'''
61c387
         return [
61c387
             r"Updating failcount for ping",
61c387
-            r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
61c387
+            r"schedulerd.*: Recover\s+(ping|lxc-ms|container)\s+\(.*\)",
61c387
             # The orphaned lxc-ms resource causes an expected transition error
61c387
             # that is a result of the scheduler not having knowledge that the
61c387
             # promotable resource used to be a clone. As a result, it looks like that 
61c387
@@ -3054,7 +3054,7 @@ class RemoteStonithd(RemoteDriver):
61c387
             r"Software caused connection abort",
61c387
             r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
61c387
             r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
61c387
-            r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
61c387
+            r"schedulerd.*:\s+Recover\s+remote-.*\s+\(.*\)",
61c387
             r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
61c387
         ]
61c387
 
61c387
@@ -3120,7 +3120,7 @@ class RemoteRscFailure(RemoteDriver):
61c387
 
61c387
     def errorstoignore(self):
61c387
         ignore_pats = [
61c387
-            r"schedulerd.*: Recover remote-rsc\s*\(.*\)",
61c387
+            r"schedulerd.*: Recover\s+remote-rsc\s+\(.*\)",
61c387
             r"Dummy.*: No process state file found",
61c387
         ]
61c387
 
61c387
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
61c387
index 90cac73..6e718f7 100644
61c387
--- a/cts/lab/patterns.py
61c387
+++ b/cts/lab/patterns.py
61c387
@@ -66,7 +66,7 @@ class BasePatterns(object):
61c387
 
61c387
             "Pat:Fencing_start"   : r"Requesting peer fencing .* targeting %s",
61c387
             "Pat:Fencing_ok"      : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
61c387
-            "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
61c387
+            "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s",
61c387
             "Pat:Fencing_active"  : r"stonith resource .* is active on 2 nodes (attempting recovery)",
61c387
             "Pat:Fencing_probe"   : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
61c387
 
61c387
@@ -180,7 +180,7 @@ class crm_corosync(BasePatterns):
61c387
             r"Parameters to .* action changed:",
61c387
             r"Parameters to .* changed",
61c387
             r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
61c387
-            r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)",
61c387
+            r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)",
61c387
             r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
61c387
             r"Peer is not part of our cluster",
61c387
             r"We appear to be in an election loop",
61c387
-- 
61c387
2.31.1
61c387
61c387
From aefc2a846ad7400fdbc5185e5dd44bceb3f9fcb5 Mon Sep 17 00:00:00 2001
61c387
From: Ken Gaillot <kgaillot@redhat.com>
61c387
Date: Mon, 19 Sep 2022 15:55:42 -0500
61c387
Subject: [PATCH 24/24] Test: cts-lab: match parentheses correctly
61c387
61c387
---
61c387
 cts/lab/patterns.py | 3 ++-
61c387
 1 file changed, 2 insertions(+), 1 deletion(-)
61c387
61c387
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
61c387
index 6e718f7..856fffb 100644
61c387
--- a/cts/lab/patterns.py
61c387
+++ b/cts/lab/patterns.py
61c387
@@ -271,6 +271,7 @@ class crm_corosync(BasePatterns):
61c387
         ]
61c387
         self.components["pacemaker-based-ignore"] = [
61c387
             r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
61c387
+            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
61c387
             # This is overbroad, but we don't have a way to say that only
61c387
             # certain transition errors are acceptable (if the fencer respawns,
61c387
             # fence devices may appear multiply active). We have to rely on
61c387
@@ -328,7 +329,7 @@ class crm_corosync(BasePatterns):
61c387
             r"crit:.*Fencing daemon connection failed",
61c387
             r"error:.*Fencer connection failed \(will retry\)",
61c387
             r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations",
61c387
-            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error",
61c387
+            r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
61c387
             # This is overbroad, but we don't have a way to say that only
61c387
             # certain transition errors are acceptable (if the fencer respawns,
61c387
             # fence devices may appear multiply active). We have to rely on
61c387
-- 
61c387
2.31.1
61c387